diff options
Diffstat (limited to 'hashtable.h')
-rw-r--r-- | hashtable.h | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/hashtable.h b/hashtable.h new file mode 100644 index 0000000..19a3897 --- /dev/null +++ b/hashtable.h @@ -0,0 +1,143 @@ +/** + * C hashtable implementation + * + * Copyright (C) 2021 Wojtek Kosior + * Redistribution terms are gathered in the `copyright' file. + */ + +/* + * https://git.koszko.org/C-hashtable + * Note that this version is likely to be more up-to-date than the one linked. + * + * This is a separate chaining hashtable for general use. It's not + * universal: it uses malloc() and free(), so it requires a standard + * library to function and it's for single-threaded use only. It does, + * however, have one advantage: it rehashes automatically, both when + * it grows in size and when it shrinks, while retaining O(1) access + * time. A normal hashtable with rehashing would have amortized O(1) + * access time, but there would be single access with O(n) time + * complexity for each rehashing. In this hashtable, rehashing is done + * in parts. For example, a ht_add(), aside from adding an entry, + * might also rehash 4 other entries from old table to the new one and + * leave the rest unrehashed. + * Of course, it is assumed that a good hash function is provided + * by the programmer. If not, accesses may still degenerate to O(n). + * Hence, this hashtable is not secure against DoS attacks. + */ + +#ifndef HASHTABLE_H +#define HASHTABLE_H + +#include <sys/types.h> /* for ssize_t */ + +/* These are possible return values of some ht_ functions (see below). */ +#define HT_OK 0 +#define HT_NO_MEM -1 +#define HT_KEY_PRESENT -2 +#define HT_KEY_ABSENT -3 + +typedef struct +{ + /* All members are considered implementation details, except for "entries", + * which can be read, but should not be modified by external code. + */ + size_t entries; + + /* + * tab[] is where entries (chains of entries) are stored. + * When rehashing, newtab[] is also used. + */ + struct ht_node **tab, **newtab; + + /* sizes of tab[] and newtab[], obviously */ + size_t tab_size, new_size; + + size_t (*hashfunc)(const void* key); + int (*cmpfunc)(const void* key1, const void *key2); + + /* + * When no rehashing is taking place, rehashing_position is -1 (#define'd as + * NOT_REHASHING in hashtable.c). At any other time, rehashing_position is the + * lowest not yet rehashed position in the smaller table. + */ + ssize_t rehashing_position; +} hashtable_t; + +/* + * All int functions return 0 (#define'd as HT_OK) on success and in + * case of failure they return error codes, as described below. + */ + +/* May fail with HT_NO_MEM. */ +int ht_init(hashtable_t *ht, + size_t (*hash)(const void* key), + int (*cmp)(const void* key1, const void *key2)); + +/* May fail with HT_NO_MEM and HT_KEY_PRESENT. */ +int ht_add(hashtable_t *ht, const void *key, const void *val); + +/* + * May fail with HT_NO_MEM. If key was not yet present in hashtable, *oldkey and + * *oldval are not modified. Otherwise, just-replaced pair is stored in them. + */ +int ht_set(hashtable_t *ht, const void *key, const void *val, + void **oldkey, void **oldval); + +/* + * If present, the looked for pair is stored in *storedkey and *val. Otherwise, + * they're not modified and HT_KEY_ABSENT is returned. storedkey and/or val can + * be NULL. + */ +int ht_get(hashtable_t *ht, const void *key, void **storedkey, void **val); + +/* + * Works like ht_get() but is thread-safe with regard to other calls to + * ht_get_threadsafe() on the same hashtable. Note that the hash and compare + * functions supplied to the hashtable also have to be thread-safe (that + * requirement is of course met for those used by ht_string_init()). + */ +int ht_get_threadsafe(hashtable_t *ht, const void *key, + void **storedkey, void **val); + +/* Works like the above but also removes the pair from ht if found. */ +int ht_rem(hashtable_t *ht, const void *key, void **storedkey, void **val); + +/* + * De-initializes the hashtable freeing all its structures. The programmer is + * responsible for freeing keys and values if they were allocated from the heap + * (see ht_map_destroy() below). + */ +void ht_destroy(hashtable_t *ht); + +/* Calls ht_finish_resizing(), then maps through ht. */ +void ht_map(hashtable_t *ht, void *arg, + void (*mapfunc)(const void *key, void *val, void *arg)); + +/* + * It might be tempting to use ht_map() to free() all keys and values stored in + * ht and then call ht_destroy(). If you think about it, ht_map() would leave + * hashtable in a broken state - with keys being deallocated. Depending on the + * implementation, ht_destroy() could cope with that, but we'd rather not + * guarrantee anything, so here's another function just for that - mapping + * through entries and destroying the hashtable immediately after, explicitly + * allowing the mapping function to deallocate keys. + */ +void ht_map_destroy(hashtable_t *ht, void *arg, + void (*mapfunc)(void *key, void *val, void *arg)); + +/* + * If hashtable is in the process of being rehashed, this function + * processes it to the end. Otherwise - it does nothing. + */ +void ht_finish_resizing(hashtable_t *ht); + +/* Included, since strings are commonly used as keys. */ +size_t ht_string_hash(const char *key); + +/* + * May fail with HT_NO_MEM. Initializes ht for use with string keys + * (using ht_string_hash() and strcmp()). + */ +int ht_string_init(hashtable_t *ht); + +#endif /* HASHTABLE_H */ |