aboutsummaryrefslogtreecommitdiff
path: root/hashtable.h
diff options
context:
space:
mode:
authorWojtek Kosior <wk@koszkonutek-tmp.pl.eu.org>2021-04-30 18:47:09 +0200
committerWojtek Kosior <wk@koszkonutek-tmp.pl.eu.org>2021-04-30 18:47:09 +0200
commit35a201cc8ef0c3f5b2df88d2e528aabee1048348 (patch)
tree902dae955480e19f4498dbe4964619fc91d09b06 /hashtable.h
downloadxml-backup-restore-35a201cc8ef0c3f5b2df88d2e528aabee1048348.tar.gz
xml-backup-restore-35a201cc8ef0c3f5b2df88d2e528aabee1048348.zip
Initial/Final commitHEADmaster
Diffstat (limited to 'hashtable.h')
-rw-r--r--hashtable.h106
1 files changed, 106 insertions, 0 deletions
diff --git a/hashtable.h b/hashtable.h
new file mode 100644
index 0000000..34456cc
--- /dev/null
+++ b/hashtable.h
@@ -0,0 +1,106 @@
+// https://libregit.org/koszko/C_hashtable
+// Note that this version might or might not be more up-to-date than
+// the one linked.
+
+// This is a separate chaining hashtable for general use. It's not
+// universal: it uses malloc() and free(), so it requires a standard
+// library to function and it's for single-threaded use only. It does,
+// however, have one advantage: it rehashes automatically, both when
+// it grows in size and when it shrinks, while retaining O(1) access
+// time. A normal hashtable with rehashing would have amortized O(1)
+// access time, but there would be single access with O(n) time
+// complexity for each rehashing. In this hashtable, rehashing is done
+// in parts. For example, a ht_add(), aside from adding an entry,
+// might also rehash 4 other entries from old table to the new one and
+// leave the rest unrehashed.
+// Of course, it is assumed that a good hash function is provided
+// by the programmer. If not, accesses may still degenerate to O(n).
+// Hence, this hashtable is not secure against DoS attacks.
+
+#include <sys/types.h> // for ssize_t
+
+// These are possible return values of some ht_ functions (see below).
+#define HT_OK 0
+#define HT_NO_MEM -1
+#define HT_KEY_PRESENT -2
+#define HT_KEY_ABSENT -3
+
+typedef struct
+{
+ // All members are considered implementation details, except for
+ // "entries", which can be read, but should not be modified by
+ // external code.
+ size_t entries;
+
+ // tab[] is where entries (chains of entries) are stored.
+ // When rehashing, newtab[] is also used.
+ struct ht_node **tab, **newtab;
+
+ // sizes of tab[] and newtab[], obviously
+ size_t tab_size, new_size;
+
+ size_t (*hashfunc)(const void* key);
+ int (*cmpfunc)(const void* key1, const void *key2);
+
+ // When no rehashing is taking place, rehashing_position is -1
+ // (#define'd as NOT_REHASHING in hashtable.c). At any other time,
+ // rehashing_position is the lowest not yet rehashed position in
+ // the smaller table.
+ ssize_t rehashing_position;
+} hashtable_t;
+
+// All int functions return 0 (#define'd as HT_OK) on success and in
+// case of failure they return error codes, as described below.
+
+// May fail with HT_NO_MEM.
+int ht_init(hashtable_t *ht,
+ size_t (*hash)(const void* key),
+ int (*cmp)(const void* key1, const void *key2));
+
+// May fail with HT_NO_MEM and HT_KEY_PRESENT.
+int ht_add(hashtable_t *ht, const void *key, const void *val);
+
+// May fail with HT_NO_MEM. If key was not yet present in hashtable,
+// *oldkey and *oldval are not modified. Otherwise, just-replaced pair
+// is stored in them.
+int ht_set(hashtable_t *ht, const void *key, const void *val,
+ void **oldkey, void **oldval);
+
+// If present, the looked for pair is stored in *storedkey and *val.
+// Otherwise, they're not modified and HT_KEY_ABSENT is returned.
+// storedkey and/or val can be NULL.
+int ht_get(hashtable_t *ht, const void *key, void **storedkey, void **val);
+
+// Works like the above but also removes the pair from ht if found.
+int ht_rem(hashtable_t *ht, const void *key, void **storedkey, void **val);
+
+// De-initializes the hashtable freeing all its structures.
+// The programmer is responsible for freeing keys and values if they
+// were allocated from the heap (see ht_map_destroy() below).
+void ht_destroy(hashtable_t *ht);
+
+// Calls ht_finish_resizing(), then maps through ht.
+void ht_map(hashtable_t *ht, void *arg,
+ void (*mapfunc)(const void *key, void *val, void *arg));
+
+// It might be tempting to use ht_map() to free() all keys and values
+// stored in ht and then call ht_destroy(). If you think about it,
+// ht_map() would leave hashtable in a broken state - with keys being
+// deallocated. Depending on the implementation, ht_destroy() could
+// cope with that, but we'd rather not guarrantee anything, so here's
+// another function just for that - mapping through entries and
+// destroying the hashtable immediately after, explicitly allowing
+// the mapping function to deallocate keys.
+void ht_map_destroy(hashtable_t *ht, void *arg,
+ void (*mapfunc)(void *key, void *val, void *arg));
+
+// If hashtable is in the process of being rehashed, this function
+// processes it to the end. Otherwise - it does nothing.
+void ht_finish_resizing(hashtable_t *ht);
+
+// Included, since strings are commonly used as keys
+size_t ht_string_hash(const char *key);
+
+// May fail with HT_NO_MEM. Initializes ht for use with string keys
+// (using ht_string_hash() and strcmp()).
+int ht_string_init(hashtable_t *ht);