Skip to content

Commit 1281133

Browse files
committed
change unordered_set API, no hash, equal fields
use static methods, prefixed by the type T. This enables inlining the hot hashtable parts, and disallows corrupting the table with changed hash or equal methods. They really need to be declared and defined statically, just as with C++, where we need to declare it for the template. Fixes GH #21
1 parent ce9c162 commit 1281133

25 files changed

+244
-208
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -832,6 +832,8 @@ List of added, changed. removed features:
832832
* algorithm: Added shuffle, iter_swap, reverse, reverse_range,
833833
lexicographical_compare, is_sorted, is_sorted_until.
834834
Requires now INCLUDE_ALGORITHM
835+
* unordered_set and children: removed hash and equal init args, and fields.
836+
They must be now declared statically beforehand as `T_hash` and `T_equal`.
835837
* array: Added difference, intersection, symmetric_difference, assign_range.
836838
* set: Added includes, includes_range.
837839
* string: Added find_if, find_if_not, find_if_range, find_if_not_range, includes,

api.lst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ ctl/unordered_set.h: max_bucket_count (A *self)
343343
ctl/unordered_set.h: load_factor (A *self)
344344
ctl/unordered_set.h: _reserve (A *self, const size_t new_size)
345345
ctl/unordered_set.h: reserve (A *self, size_t desired_count)
346-
ctl/unordered_set.h: init (size_t (*_hash)(T *), int (*_equal)(T *, T *))
346+
ctl/unordered_set.h: init (void)
347347
ctl/unordered_set.h: init_from (A *copy)
348348
ctl/unordered_set.h: rehash (A *self, size_t desired_count)
349349
ctl/unordered_set.h: _rehash (A *self, size_t count)

ctl/bits/integral.h

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* Type utilities, to apply default equal, compare, hash methods for intergral types.
1+
/* Type utilities, to apply default equal, compare for integral types.
2+
And hash methods.
23
See MIT LICENSE.
34
*/
45

@@ -14,7 +15,47 @@ _define_integral_compare(long)
1415
#undef _define_integral_compare
1516
*/
1617

18+
#include <string.h>
19+
20+
#ifndef CTL_HASH_DEFAULTS
21+
#define CTL_HASH_DEFAULTS
22+
static inline uint32_t ctl_int32_hash(uint32_t key)
23+
{
24+
key = ((key >> 16) ^ key) * 0x45d9f3b;
25+
key = ((key >> 16) ^ key) * 0x45d9f3b;
26+
key = (key >> 16) ^ key;
27+
return key;
28+
}
29+
/* FNV1a. Eventually wyhash or o1hash */
30+
static inline size_t ctl_string_hash(const char* key)
31+
{
32+
size_t h;
33+
h = 2166136261u;
34+
for (unsigned i = 0; i < strlen((char *)key); i++)
35+
{
36+
h ^= (unsigned char)key[i];
37+
h *= 16777619;
38+
}
39+
return h;
40+
}
41+
1742
#if defined(POD) && !defined(NOT_INTEGRAL)
43+
static inline int JOIN(T, equal)(T *a, T *b)
44+
{
45+
return *a == *b;
46+
}
47+
#endif
48+
49+
#endif //CTL_HASH_DEFAULTS
50+
51+
#if defined(POD) && !defined(NOT_INTEGRAL)
52+
53+
#ifdef CTL_USET
54+
static inline size_t _JOIN(A, _default_integral_hash)(T *a)
55+
{
56+
return ctl_int32_hash((uint32_t)*a);
57+
}
58+
#endif //USET
1859

1960
static inline int _JOIN(A, _default_integral_compare3)(T *a, T *b)
2061
{
@@ -34,30 +75,6 @@ static inline int _JOIN(A, _default_integral_equal)(T *a, T *b)
3475
*/
3576
}
3677

37-
static inline size_t _JOIN(A, _default_integral_hash)(T *a)
38-
{
39-
return (size_t)*a;
40-
}
41-
42-
#include <string.h>
43-
44-
#if defined str || defined u8string || defined charp || defined u8ident || defined ucharp
45-
46-
static inline size_t _JOIN(A, _default_string_hash)(T *key)
47-
{
48-
size_t h;
49-
/* FNV1a, not wyhash */
50-
h = 2166136261u;
51-
for (unsigned i = 0; i < strlen((char *)key); i++)
52-
{
53-
h ^= (unsigned char)key[i];
54-
h *= 16777619;
55-
}
56-
return h;
57-
}
58-
59-
#endif
60-
6178
#define CTL_STRINGIFY_HELPER(n) #n
6279
#define CTL_STRINGIFY(n) CTL_STRINGIFY_HELPER(n)
6380
#define _strEQcc(s1c, s2c) !strcmp(s1c "", s2c "")
@@ -83,47 +100,29 @@ static inline bool _JOIN(A, _type_is_integral)(void)
83100
_strEQcc(CTL_STRINGIFY(T), "llong");
84101
}
85102

86-
// not C++
87-
#ifndef __cplusplus
88-
#define __set_str_hash(self, t) \
89-
{ \
90-
typeof(t) tmp = (x); \
91-
if (__builtin_types_compatible_p(typeof(t), char *)) \
92-
self->hash = _JOIN(A, _default_string_hash); \
93-
else if (__builtin_types_compatible_p(typeof(t), unsigned char *)) \
94-
self->hash = _JOIN(A, _default_string_hash); \
95-
}
96-
#else
97-
#define __set_str_hash(self, t) self->hash = _JOIN(A, _default_string_hash)
98-
#endif
99-
100103
static inline void _JOIN(A, _set_default_methods)(A *self)
101104
{
102105
#if !defined CTL_STR
103106
#if defined str || defined u8string || defined charp || defined u8ident || defined ucharp
104107
{
105-
#ifdef CTL_USET
106-
if (!self->hash)
107-
__set_str_hash(self, T);
108-
#else
108+
#ifndef CTL_USET
109109
if (!self->compare)
110110
self->compare = str_key_compare;
111-
#endif
112111
if (!self->equal)
113112
self->equal = str_equal;
113+
#endif
114114
}
115115
else
116116
#endif
117117
#endif
118-
#ifdef CTL_USET
119-
if (!self->hash)
120-
self->hash = _JOIN(A, _default_integral_hash);
121-
#else
118+
#ifndef CTL_USET
122119
if (!self->compare)
123120
self->compare = _JOIN(A, _default_integral_compare);
124-
#endif
125121
if (!self->equal)
126122
self->equal = _JOIN(A, _default_integral_equal);
123+
#else
124+
(void)self;
125+
#endif
127126
}
128127

129128
#else

ctl/unordered_set.h

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,6 @@ typedef struct A
118118
float max_load_factor;
119119
void (*free)(T *);
120120
T (*copy)(T *);
121-
size_t (*hash)(T *);
122-
int (*equal)(T *, T *);
123121
#if CTL_USET_SECURITY_COLLCOUNTING == 4
124122
bool is_sorted_vector;
125123
#elif CTL_USET_SECURITY_COLLCOUNTING == 5
@@ -144,11 +142,11 @@ static inline size_t JOIN(A, bucket_count)(A *self)
144142
static inline size_t JOIN(I, index)(A *self, T value)
145143
{
146144
#ifdef CTL_USET_GROWTH_POWER2
147-
return self->hash(&value) & self->bucket_max;
145+
return JOIN(T, hash)(&value) & self->bucket_max;
148146
#elif __WORDSIZE == 127
149-
return ((uint64_t) self->hash(&value) * ((uint64_t) self->bucket_max + 1)) >> 32;
147+
return ((uint64_t) JOIN(T, hash)(&value) * ((uint64_t) self->bucket_max + 1)) >> 32;
150148
#else
151-
return self->hash(&value) % (self->bucket_max + 1);
149+
return JOIN(T, hash)(&value) % (self->bucket_max + 1);
152150
#endif
153151
}
154152

@@ -322,10 +320,12 @@ JOIN(I, range)(A* container, I* begin, I* end)
322320
}
323321
*/
324322

323+
// needed for algorithm
325324
static inline int JOIN(A, _equal)(A *self, T *a, T *b)
326325
{
327-
ASSERT(self->equal || !"equal undefined");
328-
return self->equal(a, b);
326+
//ASSERT(JOIN(T, equal) || !"equal undefined");
327+
(void)self;
328+
return JOIN(T, equal)(a, b);
329329
}
330330

331331
static inline A JOIN(A, init_from)(A *copy);
@@ -518,15 +518,15 @@ static inline B **JOIN(A, _bucket_hash)(A *self, size_t hash)
518518
static inline B **JOIN(A, _bucket)(A *self, T value)
519519
{
520520
const size_t hash = JOIN(I, index)(self, value);
521-
//LOG ("_bucket %lx %% %lu => %zu\n", self->hash(&value), self->bucket_max + 1, hash);
521+
//LOG ("_bucket %lx %% %lu => %zu\n", JOIN(T, hash)(&value), self->bucket_max + 1, hash);
522522
return &self->buckets[hash];
523523
}
524524
#endif
525525

526526
static inline size_t JOIN(A, bucket)(A *self, T value)
527527
{
528528
const size_t hash = JOIN(I, index)(self, value);
529-
//LOG ("bucket %lx %% %lu => %zu\n", self->hash(&value), self->bucket_max + 1, hash);
529+
//LOG ("bucket %lx %% %lu => %zu\n", JOIN(T, hash)(&value), self->bucket_max + 1, hash);
530530
return hash;
531531
}
532532

@@ -613,12 +613,10 @@ static inline void JOIN(A, reserve)(A *self, size_t desired_count)
613613
JOIN(A, _rehash)(self, new_size);
614614
}
615615

616-
static inline A JOIN(A, init)(size_t (*_hash)(T *), int (*_equal)(T *, T *))
616+
static inline A JOIN(A, init)(void)
617617
{
618618
static A zero;
619619
A self = zero;
620-
self.hash = _hash;
621-
self.equal = _equal;
622620
#ifdef POD
623621
self.copy = JOIN(A, implicit_copy);
624622
_JOIN(A, _set_default_methods)(&self);
@@ -633,24 +631,16 @@ static inline A JOIN(A, init)(size_t (*_hash)(T *), int (*_equal)(T *, T *))
633631

634632
static inline A JOIN(A, init_from)(A *copy)
635633
{
636-
static A zero;
637-
A self = zero;
638-
#ifdef POD
639-
self.copy = JOIN(A, implicit_copy);
640-
#else
641-
self.free = JOIN(T, free);
642-
self.copy = JOIN(T, copy);
643-
#endif
644-
self.hash = copy->hash;
645-
self.equal = copy->equal;
634+
A self = JOIN(A, init)();
635+
JOIN(A, _reserve)(&self, copy->bucket_max + 1);
646636
return self;
647637
}
648638

649639
static inline void JOIN(A, rehash)(A *self, size_t desired_count)
650640
{
651641
if (desired_count == (self->bucket_max + 1))
652642
return;
653-
A rehashed = JOIN(A, init)(self->hash, self->equal);
643+
A rehashed = JOIN(A, init)();
654644
JOIN(A, reserve)(&rehashed, desired_count);
655645
if (LIKELY(self->buckets && self->size)) // if desired_count 0
656646
{
@@ -681,7 +671,7 @@ static inline void JOIN(A, _rehash)(A *self, size_t count)
681671
// we do allow shrink here
682672
if (count == self->bucket_max + 1)
683673
return;
684-
A rehashed = JOIN(A, init)(self->hash, self->equal);
674+
A rehashed = JOIN(A, init)();
685675
//LOG("_rehash %zu => %zu\n", self->size, count);
686676
JOIN(A, _reserve)(&rehashed, count);
687677

@@ -714,7 +704,7 @@ static inline B *JOIN(A, find_node)(A *self, T value)
714704
if (self->size)
715705
{
716706
#ifdef CTL_USET_CACHED_HASH
717-
size_t hash = self->hash(&value);
707+
size_t hash = JOIN(T, hash)(&value);
718708
B **buckets = JOIN(A, _bucket_hash)(self, hash);
719709
#else
720710
B **buckets = JOIN(A, _bucket)(self, value);
@@ -739,7 +729,7 @@ static inline B *JOIN(A, find_node)(A *self, T value)
739729
if (n->cached_hash != hash)
740730
continue;
741731
#endif
742-
if (self->equal(&value, &n->value))
732+
if (JOIN(T, equal)(&value, &n->value))
743733
{
744734
#if 0 // not yet
745735
// speedup subsequent read accesses?
@@ -802,7 +792,7 @@ static inline B **JOIN(A, push_cached)(A *self, T *value)
802792
#endif
803793

804794
#ifdef CTL_USET_CACHED_HASH
805-
size_t hash = self->hash(value);
795+
size_t hash = JOIN(T, hash)(value);
806796
B **buckets = JOIN(A, _bucket_hash)(self, hash);
807797
JOIN(B, push)(buckets, JOIN(B, init_cached)(*value, hash));
808798
#else
@@ -899,7 +889,7 @@ static inline I JOIN(A, emplace_hint)(I *pos, T *value)
899889
if (!JOIN(I, done)(pos))
900890
{
901891
#ifdef CTL_USET_CACHED_HASH
902-
size_t hash = self->hash(value);
892+
size_t hash = JOIN(T, hash)(value);
903893
B **buckets = JOIN(A, _bucket_hash)(self, hash);
904894
#else
905895
B **buckets = JOIN(A, _bucket)(self, *value);
@@ -924,7 +914,7 @@ static inline I JOIN(A, emplace_hint)(I *pos, T *value)
924914
if (n->cached_hash != hash)
925915
continue;
926916
#endif
927-
if (self->equal(value, &n->value))
917+
if (JOIN(T, equal)(value, &n->value))
928918
{
929919
FREE_VALUE(self, *value);
930920
return JOIN(I, iter)(self, n);
@@ -1060,7 +1050,7 @@ static inline void JOIN(A, _linked_erase)(A *self, B **bucket, B *n, B *prev, B
10601050
static inline void JOIN(A, erase)(A *self, T value)
10611051
{
10621052
#ifdef CTL_USET_CACHED_HASH
1063-
size_t hash = self->hash(&value);
1053+
size_t hash = JOIN(T, hash)(&value);
10641054
B **buckets = JOIN(A, _bucket_hash)(self, hash);
10651055
#else
10661056
B **buckets = JOIN(A, _bucket)(self, value);
@@ -1078,7 +1068,7 @@ static inline void JOIN(A, erase)(A *self, T value)
10781068
continue;
10791069
}
10801070
#endif
1081-
if (self->equal(&value, &n->value))
1071+
if (JOIN(T, equal)(&value, &n->value))
10821072
{
10831073
JOIN(A, _linked_erase)(self, buckets, n, prev, next);
10841074
break;
@@ -1115,7 +1105,7 @@ static inline size_t JOIN(A, erase_if)(A *self, int (*_match)(T *))
11151105
static inline A JOIN(A, copy)(A *self)
11161106
{
11171107
// LOG ("copy\norig size: %lu\n", self->size);
1118-
A other = JOIN(A, init)(self->hash, self->equal);
1108+
A other = JOIN(A, init)();
11191109
JOIN(A, _reserve)(&other, self->bucket_max + 1);
11201110
foreach (A, self, it)
11211111
{
@@ -1154,7 +1144,7 @@ static inline void JOIN(A, erase_generic)(A* self, GI *range)
11541144

11551145
static inline A JOIN(A, union)(A *a, A *b)
11561146
{
1157-
A self = JOIN(A, init)(a->hash, a->equal);
1147+
A self = JOIN(A, init)();
11581148
JOIN(A, _reserve)(&self, 1 + MAX(a->bucket_max, b->bucket_max));
11591149
foreach (A, a, it1)
11601150
JOIN(A, insert)(&self, self.copy(it1.ref));
@@ -1182,7 +1172,7 @@ static inline A JOIN(A, union_range)(I *r1, GI *r2)
11821172

11831173
static inline A JOIN(A, intersection)(A *a, A *b)
11841174
{
1185-
A self = JOIN(A, init)(a->hash, a->equal);
1175+
A self = JOIN(A, init)();
11861176
foreach (A, a, it)
11871177
if (JOIN(A, find_node)(b, *it.ref))
11881178
JOIN(A, insert)(&self, self.copy(it.ref));
@@ -1192,7 +1182,7 @@ static inline A JOIN(A, intersection)(A *a, A *b)
11921182
static inline A JOIN(A, intersection_range)(I *r1, GI *r2)
11931183
{
11941184
A *a = r1->container;
1195-
A self = JOIN(A, init)(a->hash, a->equal);
1185+
A self = JOIN(A, init)();
11961186
void (*next2)(struct I*) = r2->vtable.next;
11971187
T* (*ref2)(struct I*) = r2->vtable.ref;
11981188
int (*done2)(struct I*) = r2->vtable.done;
@@ -1214,7 +1204,7 @@ static inline A JOIN(A, intersection_range)(I *r1, GI *r2)
12141204

12151205
static inline A JOIN(A, difference)(A *a, A *b)
12161206
{
1217-
A self = JOIN(A, init)(a->hash, a->equal);
1207+
A self = JOIN(A, init)();
12181208
foreach (A, a, it)
12191209
if (!JOIN(A, find_node)(b, *it.ref))
12201210
JOIN(A, insert)(&self, self.copy(it.ref));

0 commit comments

Comments
 (0)