aboutsummaryrefslogtreecommitdiff
path: root/hashtable.h
blob: 34456cc93a728ea64a039ef8dfe316f0697d88c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// https://libregit.org/koszko/C_hashtable
// Note that this version might or might not be more up-to-date than
// the one linked.

// This is a separate chaining hashtable for general use. It's not
// universal: it uses malloc() and free(), so it requires a standard
// library to function and it's for single-threaded use only. It does,
// however, have one advantage: it rehashes automatically, both when
// it grows in size and when it shrinks, while retaining O(1) access
// time. A normal hashtable with rehashing would have amortized O(1)
// access time, but there would be single access with O(n) time
// complexity for each rehashing. In this hashtable, rehashing is done
// in parts. For example, a ht_add(), aside from adding an entry,
// might also rehash 4 other entries from old table to the new one and
// leave the rest unrehashed.
// Of course, it is assumed that a good hash function is provided
// by the programmer. If not, accesses may still degenerate to O(n).
// Hence, this hashtable is not secure against DoS attacks.

#include <sys/types.h> // for ssize_t

// These are possible return values of some ht_ functions (see below).
#define HT_OK           0
#define HT_NO_MEM      -1
#define HT_KEY_PRESENT -2
#define HT_KEY_ABSENT  -3

typedef struct
{
  // All members are considered implementation details, except for
  // "entries", which can be read, but should not be modified by
  // external code.
  size_t entries;

  // tab[] is where entries (chains of entries) are stored.
  // When rehashing, newtab[] is also used.
  struct ht_node **tab, **newtab;

  // sizes of tab[] and newtab[], obviously
  size_t tab_size, new_size;

  size_t (*hashfunc)(const void* key);
  int (*cmpfunc)(const void* key1, const void *key2);

  // When no rehashing is taking place, rehashing_position is -1
  // (#define'd as NOT_REHASHING in hashtable.c). At any other time,
  // rehashing_position is the lowest not yet rehashed position in
  // the smaller table.
  ssize_t rehashing_position;
} hashtable_t;

// All int functions return 0 (#define'd as HT_OK) on success and in
// case of failure they return error codes, as described below.

// May fail with HT_NO_MEM.
int ht_init(hashtable_t *ht,
	    size_t (*hash)(const void* key),
	    int (*cmp)(const void* key1, const void *key2));

// May fail with HT_NO_MEM and HT_KEY_PRESENT.
int ht_add(hashtable_t *ht, const void *key, const void *val);

// May fail with HT_NO_MEM. If key was not yet present in hashtable,
// *oldkey and *oldval are not modified. Otherwise, just-replaced pair
// is stored in them.
int ht_set(hashtable_t *ht, const void *key, const void *val,
	   void **oldkey, void **oldval);

// If present, the looked for pair is stored in *storedkey and *val.
// Otherwise, they're not modified and HT_KEY_ABSENT is returned.
// storedkey and/or val can be NULL.
int ht_get(hashtable_t *ht, const void *key, void **storedkey, void **val);

// Works like the above but also removes the pair from ht if found.
int ht_rem(hashtable_t *ht, const void *key, void **storedkey, void **val);

// De-initializes the hashtable freeing all its structures.
// The programmer is responsible for freeing keys and values if they
// were allocated from the heap (see ht_map_destroy() below).
void ht_destroy(hashtable_t *ht);

// Calls ht_finish_resizing(), then maps through ht.
void ht_map(hashtable_t *ht, void *arg,
	    void (*mapfunc)(const void *key, void *val, void *arg));

// It might be tempting to use ht_map() to free() all keys and values
// stored in ht and then call ht_destroy(). If you think about it,
// ht_map() would leave hashtable in a broken state - with keys being
// deallocated. Depending on the implementation, ht_destroy() could
// cope with that, but we'd rather not guarrantee anything, so here's
// another function just for that - mapping through entries and
// destroying the hashtable immediately after, explicitly allowing
// the mapping function to deallocate keys.
void ht_map_destroy(hashtable_t *ht, void *arg,
		    void (*mapfunc)(void *key, void *val, void *arg));

// If hashtable is in the process of being rehashed, this function
// processes it to the end. Otherwise - it does nothing.
void ht_finish_resizing(hashtable_t *ht);

// Included, since strings are commonly used as keys
size_t ht_string_hash(const char *key);

// May fail with HT_NO_MEM. Initializes ht for use with string keys
// (using ht_string_hash() and strcmp()).
int ht_string_init(hashtable_t *ht);