aboutsummaryrefslogtreecommitdiff
path: root/hashtable.h
diff options
context:
space:
mode:
Diffstat (limited to 'hashtable.h')
-rw-r--r--hashtable.h143
1 files changed, 143 insertions, 0 deletions
diff --git a/hashtable.h b/hashtable.h
new file mode 100644
index 0000000..19a3897
--- /dev/null
+++ b/hashtable.h
@@ -0,0 +1,143 @@
+/**
+ * C hashtable implementation
+ *
+ * Copyright (C) 2021 Wojtek Kosior
+ * Redistribution terms are gathered in the `copyright' file.
+ */
+
+/*
+ * https://git.koszko.org/C-hashtable
+ * Note that this version is likely to be more up-to-date than the one linked.
+ *
+ * This is a separate chaining hashtable for general use. It's not
+ * universal: it uses malloc() and free(), so it requires a standard
+ * library to function and it's for single-threaded use only. It does,
+ * however, have one advantage: it rehashes automatically, both when
+ * it grows in size and when it shrinks, while retaining O(1) access
+ * time. A normal hashtable with rehashing would have amortized O(1)
+ * access time, but there would be single access with O(n) time
+ * complexity for each rehashing. In this hashtable, rehashing is done
+ * in parts. For example, a ht_add(), aside from adding an entry,
+ * might also rehash 4 other entries from old table to the new one and
+ * leave the rest unrehashed.
+ * Of course, it is assumed that a good hash function is provided
+ * by the programmer. If not, accesses may still degenerate to O(n).
+ * Hence, this hashtable is not secure against DoS attacks.
+ */
+
+#ifndef HASHTABLE_H
+#define HASHTABLE_H
+
+#include <sys/types.h> /* for ssize_t */
+
+/* These are possible return values of some ht_ functions (see below). */
+#define HT_OK 0
+#define HT_NO_MEM -1
+#define HT_KEY_PRESENT -2
+#define HT_KEY_ABSENT -3
+
+typedef struct
+{
+ /* All members are considered implementation details, except for "entries",
+ * which can be read, but should not be modified by external code.
+ */
+ size_t entries;
+
+ /*
+ * tab[] is where entries (chains of entries) are stored.
+ * When rehashing, newtab[] is also used.
+ */
+ struct ht_node **tab, **newtab;
+
+ /* sizes of tab[] and newtab[], obviously */
+ size_t tab_size, new_size;
+
+ size_t (*hashfunc)(const void* key);
+ int (*cmpfunc)(const void* key1, const void *key2);
+
+ /*
+ * When no rehashing is taking place, rehashing_position is -1 (#define'd as
+ * NOT_REHASHING in hashtable.c). At any other time, rehashing_position is the
+ * lowest not yet rehashed position in the smaller table.
+ */
+ ssize_t rehashing_position;
+} hashtable_t;
+
+/*
+ * All int functions return 0 (#define'd as HT_OK) on success and in
+ * case of failure they return error codes, as described below.
+ */
+
+/* May fail with HT_NO_MEM. */
+int ht_init(hashtable_t *ht,
+ size_t (*hash)(const void* key),
+ int (*cmp)(const void* key1, const void *key2));
+
+/* May fail with HT_NO_MEM and HT_KEY_PRESENT. */
+int ht_add(hashtable_t *ht, const void *key, const void *val);
+
+/*
+ * May fail with HT_NO_MEM. If key was not yet present in hashtable, *oldkey and
+ * *oldval are not modified. Otherwise, just-replaced pair is stored in them.
+ */
+int ht_set(hashtable_t *ht, const void *key, const void *val,
+ void **oldkey, void **oldval);
+
+/*
+ * If present, the looked for pair is stored in *storedkey and *val. Otherwise,
+ * they're not modified and HT_KEY_ABSENT is returned. storedkey and/or val can
+ * be NULL.
+ */
+int ht_get(hashtable_t *ht, const void *key, void **storedkey, void **val);
+
+/*
+ * Works like ht_get() but is thread-safe with regard to other calls to
+ * ht_get_threadsafe() on the same hashtable. Note that the hash and compare
+ * functions supplied to the hashtable also have to be thread-safe (that
+ * requirement is of course met for those used by ht_string_init()).
+ */
+int ht_get_threadsafe(hashtable_t *ht, const void *key,
+ void **storedkey, void **val);
+
+/* Works like the above but also removes the pair from ht if found. */
+int ht_rem(hashtable_t *ht, const void *key, void **storedkey, void **val);
+
+/*
+ * De-initializes the hashtable freeing all its structures. The programmer is
+ * responsible for freeing keys and values if they were allocated from the heap
+ * (see ht_map_destroy() below).
+ */
+void ht_destroy(hashtable_t *ht);
+
+/* Calls ht_finish_resizing(), then maps through ht. */
+void ht_map(hashtable_t *ht, void *arg,
+ void (*mapfunc)(const void *key, void *val, void *arg));
+
+/*
+ * It might be tempting to use ht_map() to free() all keys and values stored in
+ * ht and then call ht_destroy(). If you think about it, ht_map() would leave
+ * hashtable in a broken state - with keys being deallocated. Depending on the
+ * implementation, ht_destroy() could cope with that, but we'd rather not
+ * guarrantee anything, so here's another function just for that - mapping
+ * through entries and destroying the hashtable immediately after, explicitly
+ * allowing the mapping function to deallocate keys.
+ */
+void ht_map_destroy(hashtable_t *ht, void *arg,
+ void (*mapfunc)(void *key, void *val, void *arg));
+
+/*
+ * If hashtable is in the process of being rehashed, this function
+ * processes it to the end. Otherwise - it does nothing.
+ */
+void ht_finish_resizing(hashtable_t *ht);
+
+/* Included, since strings are commonly used as keys. */
+size_t ht_string_hash(const char *key);
+
+/*
+ * May fail with HT_NO_MEM. Initializes ht for use with string keys
+ * (using ht_string_hash() and strcmp()).
+ */
+int ht_string_init(hashtable_t *ht);
+
+#endif /* HASHTABLE_H */