-
Notifications
You must be signed in to change notification settings - Fork 148
Expand file tree
/
Copy pathhashtable.h
More file actions
410 lines (379 loc) · 12.8 KB
/
hashtable.h
File metadata and controls
410 lines (379 loc) · 12.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
*
* hashtable.h -- a generic open addressing hashtable.
*
* Copyright (C) 2003 by Donovan Baarda <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/** \file hashtable.h
* A generic open addressing hashtable.
*
* This is a minimal hashtable containing pointers to arbitrary entries with
* configurable hashtable size and support for custom hash() and cmp() methods.
* The cmp() method can either be a simple comparison between two keys, or can
* be against a special match object containing additional mutable state. This
* allows for things like deferred and cached evaluation of costly comparison
* data. The hash() function doesn't need to avoid clustering behaviour.
*
* It uses open addressing with quadratic probing for collisions. The
* MurmurHash3 finalization function is optionally used on the hash() output to
* avoid clustering and can be disabled by setting HASHTABLE_NMIX32. There is
* no support for removing entries, only adding them. Multiple entries with the
* same key can be added, and you can use a fancy cmp() function to find
* particular entries by more than just their key. There is an iterator for
* iterating through all entries in the hashtable. There are optional
* NAME_find() find/match/hashcmp/entrycmp stats counters that can be disabled
* by defining HASHTABLE_NSTATS. There is an optional simple k=1 bloom filter
* for speed that can be disabled by defining HASHTABLE_NBLOOM.
*
* The types and methods of the hashtable and its contents are specified by
* using \#define parameters set to their basenames (the prefixes for the *_t
* type and *_func() methods) before doing \#include "hashtable.h". This
* produces static inline type-safe methods that are either application
* optimized for speed or wrappers around void* implementation methods for
* compactness.
*
* \param ENTRY - the entry type basename.
*
* \param KEY - optional key type basename (default: ENTRY).
*
* \param MATCH - optional match type basename (default: KEY).
*
* \param NAME - optional hashtable type basename (default: ENTRY_hashtable).
*
* Example: \code
* typedef ... mykey_t;
* int mykey_hash(mykey_t const *e);
* int mykey_cmp(mykey_t *e, mykey_t const *o);
*
* typedef struct myentry {
* mykey_t key; // Inherit from mykey_t.
* ...extra entry value data...
* } myentry_t;
* void myentry_init(myentry_t *e, ...);
*
* #define ENTRY myentry
* #define KEY mykey
* #include "hashtable.h"
*
* hashtable_t *t;
* myentry_t entries[300];
* mykey_t k;
* myentry_t *e;
*
* t = myentry_hashtable_new(300);
* myentry_init(&entries[5], ...);
* myentry_hashtable_add(t, &entries[5]);
* k = ...;
* e = myentry_hashtable_find(t, &k);
*
* int i;
* for (e = myentry_hashtable_iter(t, &i); e != NULL;
* e = myentry_hashtable_next(t, &i))
* ...
*
* myentry_hashtable_free(t);
* \endcode
*
* The mykey_hash() and mykey_cmp() fuctions will typically take pointers to
* mykey/myentry instances the same as the pointers stored in the hashtable.
* However it is also possible for them to take "match objects" that are a
* "subclass" of the entry type that contain additional state for complicated
* comparision operations.
*
* Example: \code
* typedef struct mymatch {
* mykey_t key; // Inherit from mykey_t;
* ...extra match criteria and state data...
* } mymatch_t;
* int mymatch_cmp(mymatch_t *m, myentry_t const *e);
*
* #define ENTRY myentry
* #define KEY mykey
* #define MATCH mymatch
* #include "hashtable.h"
*
* ...
* mymatch_t m;
*
* t = myentry_hashtable_new(300);
* ...
* m = ...;
* e = myentry_hashtable_find(t, &m);
* \endcode
*
* The mymatch_cmp() function is only called for finding hashtable entries and
* can mutate the mymatch_t object for doing things like deferred and cached
* evaluation of expensive match data. It can also access the whole myentry_t
* object to match against more than just the key. */
#ifndef HASHTABLE_H
# define HASHTABLE_H
# include <stdbool.h>
/** The hashtable type. */
typedef struct hashtable {
int size; /**< Size of allocated hashtable. */
int count; /**< Number of entries in hashtable. */
unsigned tmask; /**< Mask to get the hashtable index. */
# ifndef HASHTABLE_NBLOOM
unsigned bshift; /**< Shift to get the bloomfilter index. */
# endif
# ifndef HASHTABLE_NSTATS
/* The following are for accumulating NAME_find() stats. */
long find_count; /**< The count of finds tried. */
long match_count; /**< The count of matches found. */
long hashcmp_count; /**< The count of hash compares done. */
long entrycmp_count; /**< The count of entry compares done. */
# endif
# ifndef HASHTABLE_NBLOOM
unsigned char *kbloom; /**< Bloom filter of hash keys with k=1. */
# endif
void **etable; /**< Table of pointers to entries. */
unsigned ktable[]; /**< Table of hash keys. */
} hashtable_t;
/* void* implementations for the type-safe static inline wrappers below. */
hashtable_t *_hashtable_new(int size);
void _hashtable_free(hashtable_t *t);
# ifndef HASHTABLE_NBLOOM
static inline void hashtable_setbloom(hashtable_t *t, unsigned const h)
{
/* Use upper bits for a "different hash". */
unsigned const i = h >> t->bshift;
t->kbloom[i / 8] |= (unsigned char)(1 << (i % 8));
}
static inline bool hashtable_getbloom(hashtable_t *t, unsigned const h)
{
/* Use upper bits for a "different hash". */
unsigned const i = h >> t->bshift;
return (t->kbloom[i / 8] >> (i % 8)) & 1;
}
# endif
/** MurmurHash3 finalization mix function. */
static inline unsigned mix32(unsigned h)
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
/** Ensure hash's are never zero. */
static inline unsigned nozero(unsigned h)
{
return h ? h : (unsigned)-1;
}
#endif /* !HASHTABLE_H */
/* If ENTRY is defined, define type-dependent static inline methods. */
#ifdef ENTRY
# include <assert.h>
# include <stddef.h>
# define _JOIN2(x, y) x##y
# define _JOIN(x, y) _JOIN2(x, y)
# ifndef KEY
# define KEY ENTRY
# endif
# ifndef MATCH
# define MATCH KEY
# endif
# ifndef NAME
# define NAME _JOIN(ENTRY, _hashtable)
# endif
# define ENTRY_t _JOIN(ENTRY, _t) /**< The entry type. */
# define KEY_t _JOIN(KEY, _t) /**< The key type. */
# define MATCH_t _JOIN(MATCH, _t) /**< The match type. */
# define KEY_hash _JOIN(KEY, _hash) /**< The key hash(k) method. */
# define MATCH_cmp _JOIN(MATCH, _cmp) /**< The match cmp(m, e) method. */
/* The names for all the hashtable methods. */
# define NAME_new _JOIN(NAME, _new)
# define NAME_free _JOIN(NAME, _free)
# define NAME_stats_init _JOIN(NAME, _stats_init)
# define NAME_add _JOIN(NAME, _add)
# define NAME_find _JOIN(NAME, _find)
# define NAME_iter _JOIN(NAME, _iter)
# define NAME_next _JOIN(NAME, _next)
/* Modified hash() with/without mix32() and reserving zero for empty buckets. */
# ifdef HASHTABLE_NMIX32
# define _KEY_HASH(k) nozero(KEY_hash((KEY_t *)k))
# else
# define _KEY_HASH(k) nozero(mix32(KEY_hash((KEY_t *)k)))
# endif
/* Loop macro for probing table t for key hash hk, iterating with index i and
entry hash h, terminating at an empty bucket. */
# define _for_probe(t, hk, i, h) \
unsigned const *const ktable = t->ktable;\
unsigned const tmask = t->tmask;\
unsigned i, s, h;\
for (i = hk & tmask, s = 0; (h = ktable[i]); i = (i + ++s) & tmask)
/* Conditional macro for incrementing stats counters. */
# ifndef HASHTABLE_NSTATS
# define _stats_inc(c) (c++)
# else
# define _stats_inc(c)
# endif
/** Allocate and initialize a hashtable instance.
*
* The provided size is used as an indication of the number of entries you wish
* to add, but the allocated size will probably be larger depending on the
* implementation to enable optimisations or avoid degraded performance. It may
* be possible to fill the table beyond the requested size, but performance can
* start to degrade badly if it is over filled.
*
* \param size - The desired minimum size of the hash table.
*
* \return The initialized hashtable instance or NULL if it failed. */
static inline hashtable_t *NAME_new(int size)
{
return _hashtable_new(size);
}
/** Destroy and free a hashtable instance.
*
* This will free the hashtable, but will not free the entries in the
* hashtable. If you want to free the entries too, use a hashtable iterator to
* free the the entries first.
*
* \param *t - The hashtable to destroy and free. */
static inline void NAME_free(hashtable_t *t)
{
_hashtable_free(t);
}
/** Initialize hashtable stats counters.
*
* This will reset all the stats counters for the hashtable,
*
* \param *t - The hashtable to initializ stats for. */
static inline void NAME_stats_init(hashtable_t *t)
{
# ifndef HASHTABLE_NSTATS
t->find_count = t->match_count = t->hashcmp_count = t->entrycmp_count = 0;
# endif
}
/** Add an entry to a hashtable.
*
* This doesn't use MATCH_cmp() or do any checks for existing copies or
* instances, so it will add duplicates. If you want to avoid adding
* duplicates, use NAME_find() to check for existing entries first.
*
* \param *t - The hashtable to add to.
*
* \param *e - The entry object to add.
*
* \return The added entry, or NULL if the table is full. */
static inline ENTRY_t *NAME_add(hashtable_t *t, ENTRY_t *e)
{
unsigned he = _KEY_HASH(e);
assert(e != NULL);
if (t->count + 1 == t->size)
return NULL;
# ifndef HASHTABLE_NBLOOM
hashtable_setbloom(t, he);
# endif
_for_probe(t, he, i, h);
t->count++;
t->ktable[i] = he;
return t->etable[i] = e;
}
/** Find an entry in a hashtable.
*
* Uses MATCH_cmp() to find the first matching entry in the table in the same
* hash() bucket.
*
* \param *t - The hashtable to search.
*
* \param *m - The key or match object to search for.
*
* \return The first found entry, or NULL if nothing was found. */
static inline ENTRY_t *NAME_find(hashtable_t *t, MATCH_t *m)
{
assert(m != NULL);
unsigned hm = _KEY_HASH(m);
ENTRY_t *e;
_stats_inc(t->find_count);
# ifndef HASHTABLE_NBLOOM
if (!hashtable_getbloom(t, hm))
return NULL;
# endif
_for_probe(t, hm, i, he) {
_stats_inc(t->hashcmp_count);
if (hm == he) {
_stats_inc(t->entrycmp_count);
if (!MATCH_cmp(m, e = t->etable[i])) {
_stats_inc(t->match_count);
return e;
}
}
}
/* Also count the compare for the empty bucket. */
_stats_inc(t->hashcmp_count);
return NULL;
}
static inline ENTRY_t *NAME_next(hashtable_t *t, int *i);
/** Initialize a iteration and return the first entry.
*
* This works together with NAME_next() for iterating through all entries in a
* hashtable.
*
* Example: \code
* for (e = NAME_iter(t, &i); e != NULL; e = NAME_next(t, &i))
* ...
* \endcode
*
* \param *t - the hashtable to iterate over.
*
* \param *i - the int iterator index to initialize.
*
* \return The first entry or NULL if the hashtable is empty. */
static inline ENTRY_t *NAME_iter(hashtable_t *t, int *i)
{
assert(t != NULL);
assert(i != NULL);
*i = 0;
return NAME_next(t, i);
}
/** Get the next entry from a hashtable iterator or NULL when finished.
*
* This works together with NAME_iter() for iterating through all entries in a
* hashtable.
*
* \param *t - the hashtable to iterate over.
*
* \param *i - the int iterator index to use.
*
* \return The next entry or NULL if the iterator is finished. */
static inline ENTRY_t *NAME_next(hashtable_t *t, int *i)
{
assert(t != NULL);
assert(i != NULL);
ENTRY_t *e = NULL;
while ((*i < t->size) && !(e = t->etable[(*i)++])) ;
return e;
}
# undef ENTRY
# undef KEY
# undef MATCH
# undef NAME
# undef ENTRY_t
# undef KEY_t
# undef MATCH_t
# undef KEY_hash
# undef MATCH_cmp
# undef NAME_new
# undef NAME_free
# undef NAME_stats_init
# undef NAME_add
# undef NAME_find
# undef NAME_iter
# undef NAME_next
# undef _KEY_HASH
#endif /* ENTRY */