Skip to content

Commit a1c709e

Browse files
committed
change unordered_set API, no hash, equal fields
use static methods, prefixed by the type T. This enables inlining the hot hashtable parts, and disallows corrupting the table with changed hash or equal methods. They really need to be declared and defined statically, just as with C++, where we need to declare it for the template. Fixes GH #21
1 parent 2df4f76 commit a1c709e

25 files changed

+244
-207
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,8 @@ List of added, changed. removed features:
832832
* algorithm: Added shuffle, iter_swap, reverse, reverse_range,
833833
lexicographical_compare, is_sorted, is_sorted_until.
834834
Requires now INCLUDE_ALGORITHM
835+
* unordered_set and children: removed hash and equal init args, and fields.
836+
They must be now declared statically beforehand as `T_hash` and `T_equal`.
835837
* array: Added difference, intersection, symmetric_difference, assign_range.
836838
* set: Added includes, includes_range.
837839
* string: Added find_if, find_if_not, find_if_range, find_if_not_range, includes,

api.lst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ ctl/unordered_set.h: max_bucket_count (A *self)
343343
ctl/unordered_set.h: load_factor (A *self)
344344
ctl/unordered_set.h: _reserve (A *self, const size_t new_size)
345345
ctl/unordered_set.h: reserve (A *self, size_t desired_count)
346-
ctl/unordered_set.h: init (size_t (*_hash)(T *), int (*_equal)(T *, T *))
346+
ctl/unordered_set.h: init (void)
347347
ctl/unordered_set.h: init_from (A *copy)
348348
ctl/unordered_set.h: rehash (A *self, size_t desired_count)
349349
ctl/unordered_set.h: _rehash (A *self, size_t count)

ctl/bits/integral.h

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* Type utilities, to apply default equal, compare, hash methods for intergral types.
1+
/* Type utilities, to apply default equal, compare for integral types.
2+
And hash methods.
23
See MIT LICENSE.
34
*/
45

@@ -14,7 +15,47 @@ _define_integral_compare(long)
1415
#undef _define_integral_compare
1516
*/
1617

18+
#include <string.h>
19+
20+
#ifndef CTL_HASH_DEFAULTS
21+
#define CTL_HASH_DEFAULTS
22+
static inline uint32_t ctl_int32_hash(uint32_t key)
23+
{
24+
key = ((key >> 16) ^ key) * 0x45d9f3b;
25+
key = ((key >> 16) ^ key) * 0x45d9f3b;
26+
key = (key >> 16) ^ key;
27+
return key;
28+
}
29+
/* FNV1a. Eventually wyhash or o1hash */
30+
static inline size_t ctl_string_hash(const char* key)
31+
{
32+
size_t h;
33+
h = 2166136261u;
34+
for (unsigned i = 0; i < strlen((char *)key); i++)
35+
{
36+
h ^= (unsigned char)key[i];
37+
h *= 16777619;
38+
}
39+
return h;
40+
}
41+
1742
#if defined(POD) && !defined(NOT_INTEGRAL)
43+
static inline int JOIN(T, equal)(T *a, T *b)
44+
{
45+
return *a == *b;
46+
}
47+
#endif
48+
49+
#endif //CTL_HASH_DEFAULTS
50+
51+
#if defined(POD) && !defined(NOT_INTEGRAL)
52+
53+
#ifdef CTL_USET
54+
static inline size_t _JOIN(A, _default_integral_hash)(T *a)
55+
{
56+
return ctl_int32_hash((uint32_t)*a);
57+
}
58+
#endif //USET
1859

1960
static inline int _JOIN(A, _default_integral_compare3)(T *a, T *b)
2061
{
@@ -34,30 +75,6 @@ static inline int _JOIN(A, _default_integral_equal)(T *a, T *b)
3475
*/
3576
}
3677

37-
static inline size_t _JOIN(A, _default_integral_hash)(T *a)
38-
{
39-
return (size_t)*a;
40-
}
41-
42-
#include <string.h>
43-
44-
#if defined str || defined u8string || defined charp || defined u8ident || defined ucharp
45-
46-
static inline size_t _JOIN(A, _default_string_hash)(T *key)
47-
{
48-
size_t h;
49-
/* FNV1a, not wyhash */
50-
h = 2166136261u;
51-
for (unsigned i = 0; i < strlen((char *)key); i++)
52-
{
53-
h ^= (unsigned char)key[i];
54-
h *= 16777619;
55-
}
56-
return h;
57-
}
58-
59-
#endif
60-
6178
#define CTL_STRINGIFY_HELPER(n) #n
6279
#define CTL_STRINGIFY(n) CTL_STRINGIFY_HELPER(n)
6380
#define _strEQcc(s1c, s2c) !strcmp(s1c "", s2c "")
@@ -83,47 +100,29 @@ static inline bool _JOIN(A, _type_is_integral)(void)
83100
_strEQcc(CTL_STRINGIFY(T), "llong");
84101
}
85102

86-
// not C++
87-
#ifndef __cplusplus
88-
#define __set_str_hash(self, t) \
89-
{ \
90-
typeof(t) tmp = (x); \
91-
if (__builtin_types_compatible_p(typeof(t), char *)) \
92-
self->hash = _JOIN(A, _default_string_hash); \
93-
else if (__builtin_types_compatible_p(typeof(t), unsigned char *)) \
94-
self->hash = _JOIN(A, _default_string_hash); \
95-
}
96-
#else
97-
#define __set_str_hash(self, t) self->hash = _JOIN(A, _default_string_hash)
98-
#endif
99-
100103
static inline void _JOIN(A, _set_default_methods)(A *self)
101104
{
102105
#if !defined CTL_STR
103106
#if defined str || defined u8string || defined charp || defined u8ident || defined ucharp
104107
{
105-
#ifdef CTL_USET
106-
if (!self->hash)
107-
__set_str_hash(self, T);
108-
#else
108+
#ifndef CTL_USET
109109
if (!self->compare)
110110
self->compare = str_key_compare;
111-
#endif
112111
if (!self->equal)
113112
self->equal = str_equal;
113+
#endif
114114
}
115115
else
116116
#endif
117117
#endif
118-
#ifdef CTL_USET
119-
if (!self->hash)
120-
self->hash = _JOIN(A, _default_integral_hash);
121-
#else
118+
#ifndef CTL_USET
122119
if (!self->compare)
123120
self->compare = _JOIN(A, _default_integral_compare);
124-
#endif
125121
if (!self->equal)
126122
self->equal = _JOIN(A, _default_integral_equal);
123+
#else
124+
(void)self;
125+
#endif
127126
}
128127

129128
#else

ctl/unordered_set.h

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,6 @@ typedef struct A
118118
float max_load_factor;
119119
void (*free)(T *);
120120
T (*copy)(T *);
121-
size_t (*hash)(T *);
122-
int (*equal)(T *, T *);
123121
#if CTL_USET_SECURITY_COLLCOUNTING == 4
124122
bool is_sorted_vector;
125123
#elif CTL_USET_SECURITY_COLLCOUNTING == 5
@@ -144,11 +142,11 @@ static inline size_t JOIN(A, bucket_count)(A *self)
144142
static inline size_t JOIN(I, index)(A *self, T value)
145143
{
146144
#ifdef CTL_USET_GROWTH_POWER2
147-
return self->hash(&value) & self->bucket_max;
145+
return JOIN(T, hash)(&value) & self->bucket_max;
148146
#elif __WORDSIZE == 127
149-
return ((uint64_t) self->hash(&value) * ((uint64_t) self->bucket_max + 1)) >> 32;
147+
return ((uint64_t) JOIN(T, hash)(&value) * ((uint64_t) self->bucket_max + 1)) >> 32;
150148
#else
151-
return self->hash(&value) % (self->bucket_max + 1);
149+
return JOIN(T, hash)(&value) % (self->bucket_max + 1);
152150
#endif
153151
}
154152

@@ -322,10 +320,12 @@ JOIN(I, range)(A* container, I* begin, I* end)
322320
}
323321
*/
324322

323+
// needed for algorithm
325324
static inline int JOIN(A, _equal)(A *self, T *a, T *b)
326325
{
327-
ASSERT(self->equal || !"equal undefined");
328-
return self->equal(a, b);
326+
//ASSERT(JOIN(T, equal) || !"equal undefined");
327+
(void)self;
328+
return JOIN(T, equal)(a, b);
329329
}
330330

331331
static inline A JOIN(A, init_from)(A *copy);
@@ -518,15 +518,15 @@ static inline B **JOIN(A, _bucket_hash)(A *self, size_t hash)
518518
static inline B **JOIN(A, _bucket)(A *self, T value)
519519
{
520520
const size_t hash = JOIN(I, index)(self, value);
521-
//LOG ("_bucket %lx %% %lu => %zu\n", self->hash(&value), self->bucket_max + 1, hash);
521+
//LOG ("_bucket %lx %% %lu => %zu\n", JOIN(T, hash)(&value), self->bucket_max + 1, hash);
522522
return &self->buckets[hash];
523523
}
524524
#endif
525525

526526
static inline size_t JOIN(A, bucket)(A *self, T value)
527527
{
528528
const size_t hash = JOIN(I, index)(self, value);
529-
//LOG ("bucket %lx %% %lu => %zu\n", self->hash(&value), self->bucket_max + 1, hash);
529+
//LOG ("bucket %lx %% %lu => %zu\n", JOIN(T, hash)(&value), self->bucket_max + 1, hash);
530530
return hash;
531531
}
532532

@@ -613,12 +613,10 @@ static inline void JOIN(A, reserve)(A *self, size_t desired_count)
613613
JOIN(A, _rehash)(self, new_size);
614614
}
615615

616-
static inline A JOIN(A, init)(size_t (*_hash)(T *), int (*_equal)(T *, T *))
616+
static inline A JOIN(A, init)(void)
617617
{
618618
static A zero;
619619
A self = zero;
620-
self.hash = _hash;
621-
self.equal = _equal;
622620
#ifdef POD
623621
self.copy = JOIN(A, implicit_copy);
624622
_JOIN(A, _set_default_methods)(&self);
@@ -633,24 +631,15 @@ static inline A JOIN(A, init)(size_t (*_hash)(T *), int (*_equal)(T *, T *))
633631

634632
static inline A JOIN(A, init_from)(A *copy)
635633
{
636-
static A zero;
637-
A self = zero;
638-
#ifdef POD
639-
self.copy = JOIN(A, implicit_copy);
640-
#else
641-
self.free = JOIN(T, free);
642-
self.copy = JOIN(T, copy);
643-
#endif
644-
self.hash = copy->hash;
645-
self.equal = copy->equal;
646-
return self;
634+
(void)copy;
635+
return JOIN(A, init)();
647636
}
648637

649638
static inline void JOIN(A, rehash)(A *self, size_t desired_count)
650639
{
651640
if (desired_count == (self->bucket_max + 1))
652641
return;
653-
A rehashed = JOIN(A, init)(self->hash, self->equal);
642+
A rehashed = JOIN(A, init)();
654643
JOIN(A, reserve)(&rehashed, desired_count);
655644
if (LIKELY(self->buckets && self->size)) // if desired_count 0
656645
{
@@ -681,7 +670,7 @@ static inline void JOIN(A, _rehash)(A *self, size_t count)
681670
// we do allow shrink here
682671
if (count == self->bucket_max + 1)
683672
return;
684-
A rehashed = JOIN(A, init)(self->hash, self->equal);
673+
A rehashed = JOIN(A, init)();
685674
//LOG("_rehash %zu => %zu\n", self->size, count);
686675
JOIN(A, _reserve)(&rehashed, count);
687676

@@ -714,7 +703,7 @@ static inline B *JOIN(A, find_node)(A *self, T value)
714703
if (self->size)
715704
{
716705
#ifdef CTL_USET_CACHED_HASH
717-
size_t hash = self->hash(&value);
706+
size_t hash = JOIN(T, hash)(&value);
718707
B **buckets = JOIN(A, _bucket_hash)(self, hash);
719708
#else
720709
B **buckets = JOIN(A, _bucket)(self, value);
@@ -739,7 +728,7 @@ static inline B *JOIN(A, find_node)(A *self, T value)
739728
if (n->cached_hash != hash)
740729
continue;
741730
#endif
742-
if (self->equal(&value, &n->value))
731+
if (JOIN(T, equal)(&value, &n->value))
743732
{
744733
#if 0 // not yet
745734
// speedup subsequent read accesses?
@@ -802,7 +791,7 @@ static inline B **JOIN(A, push_cached)(A *self, T *value)
802791
#endif
803792

804793
#ifdef CTL_USET_CACHED_HASH
805-
size_t hash = self->hash(value);
794+
size_t hash = JOIN(T, hash)(value);
806795
B **buckets = JOIN(A, _bucket_hash)(self, hash);
807796
JOIN(B, push)(buckets, JOIN(B, init_cached)(*value, hash));
808797
#else
@@ -899,7 +888,7 @@ static inline I JOIN(A, emplace_hint)(I *pos, T *value)
899888
if (!JOIN(I, done)(pos))
900889
{
901890
#ifdef CTL_USET_CACHED_HASH
902-
size_t hash = self->hash(value);
891+
size_t hash = JOIN(T, hash)(value);
903892
B **buckets = JOIN(A, _bucket_hash)(self, hash);
904893
#else
905894
B **buckets = JOIN(A, _bucket)(self, *value);
@@ -924,7 +913,7 @@ static inline I JOIN(A, emplace_hint)(I *pos, T *value)
924913
if (n->cached_hash != hash)
925914
continue;
926915
#endif
927-
if (self->equal(value, &n->value))
916+
if (JOIN(T, equal)(value, &n->value))
928917
{
929918
FREE_VALUE(self, *value);
930919
return JOIN(I, iter)(self, n);
@@ -1060,7 +1049,7 @@ static inline void JOIN(A, _linked_erase)(A *self, B **bucket, B *n, B *prev, B
10601049
static inline void JOIN(A, erase)(A *self, T value)
10611050
{
10621051
#ifdef CTL_USET_CACHED_HASH
1063-
size_t hash = self->hash(&value);
1052+
size_t hash = JOIN(T, hash)(&value);
10641053
B **buckets = JOIN(A, _bucket_hash)(self, hash);
10651054
#else
10661055
B **buckets = JOIN(A, _bucket)(self, value);
@@ -1078,7 +1067,7 @@ static inline void JOIN(A, erase)(A *self, T value)
10781067
continue;
10791068
}
10801069
#endif
1081-
if (self->equal(&value, &n->value))
1070+
if (JOIN(T, equal)(&value, &n->value))
10821071
{
10831072
JOIN(A, _linked_erase)(self, buckets, n, prev, next);
10841073
break;
@@ -1115,7 +1104,7 @@ static inline size_t JOIN(A, erase_if)(A *self, int (*_match)(T *))
11151104
static inline A JOIN(A, copy)(A *self)
11161105
{
11171106
// LOG ("copy\norig size: %lu\n", self->size);
1118-
A other = JOIN(A, init)(self->hash, self->equal);
1107+
A other = JOIN(A, init)();
11191108
JOIN(A, _reserve)(&other, self->bucket_max + 1);
11201109
foreach (A, self, it)
11211110
{
@@ -1154,7 +1143,7 @@ static inline void JOIN(A, erase_generic)(A* self, GI *range)
11541143

11551144
static inline A JOIN(A, union)(A *a, A *b)
11561145
{
1157-
A self = JOIN(A, init)(a->hash, a->equal);
1146+
A self = JOIN(A, init)();
11581147
JOIN(A, _reserve)(&self, 1 + MAX(a->bucket_max, b->bucket_max));
11591148
foreach (A, a, it1)
11601149
JOIN(A, insert)(&self, self.copy(it1.ref));
@@ -1182,7 +1171,7 @@ static inline A JOIN(A, union_range)(I *r1, GI *r2)
11821171

11831172
static inline A JOIN(A, intersection)(A *a, A *b)
11841173
{
1185-
A self = JOIN(A, init)(a->hash, a->equal);
1174+
A self = JOIN(A, init)();
11861175
foreach (A, a, it)
11871176
if (JOIN(A, find_node)(b, *it.ref))
11881177
JOIN(A, insert)(&self, self.copy(it.ref));
@@ -1192,7 +1181,7 @@ static inline A JOIN(A, intersection)(A *a, A *b)
11921181
static inline A JOIN(A, intersection_range)(I *r1, GI *r2)
11931182
{
11941183
A *a = r1->container;
1195-
A self = JOIN(A, init)(a->hash, a->equal);
1184+
A self = JOIN(A, init)();
11961185
void (*next2)(struct I*) = r2->vtable.next;
11971186
T* (*ref2)(struct I*) = r2->vtable.ref;
11981187
int (*done2)(struct I*) = r2->vtable.done;
@@ -1214,7 +1203,7 @@ static inline A JOIN(A, intersection_range)(I *r1, GI *r2)
12141203

12151204
static inline A JOIN(A, difference)(A *a, A *b)
12161205
{
1217-
A self = JOIN(A, init)(a->hash, a->equal);
1206+
A self = JOIN(A, init)();
12181207
foreach (A, a, it)
12191208
if (!JOIN(A, find_node)(b, *it.ref))
12201209
JOIN(A, insert)(&self, self.copy(it.ref));

0 commit comments

Comments
 (0)