aboutsummaryrefslogtreecommitdiff
path: root/hashtable.h
blob: 19a3897b7db104535849c20099dc8079160f283d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/**
 * C hashtable implementation
 *
 * Copyright (C) 2021 Wojtek Kosior
 * Redistribution terms are gathered in the `copyright' file.
 */

/*
 * https://git.koszko.org/C-hashtable
 * Note that this version is likely to be more up-to-date than the one linked.
 *
 * This is a separate chaining hashtable for general use. It's not
 * universal: it uses malloc() and free(), so it requires a standard
 * library to function and it's for single-threaded use only. It does,
 * however, have one advantage: it rehashes automatically, both when
 * it grows in size and when it shrinks, while retaining O(1) access
 * time. A normal hashtable with rehashing would have amortized O(1)
 * access time, but there would be single access with O(n) time
 * complexity for each rehashing. In this hashtable, rehashing is done
 * in parts. For example, a ht_add(), aside from adding an entry,
 * might also rehash 4 other entries from old table to the new one and
 * leave the rest unrehashed.
 * Of course, it is assumed that a good hash function is provided
 * by the programmer. If not, accesses may still degenerate to O(n).
 * Hence, this hashtable is not secure against DoS attacks.
 */

#ifndef HASHTABLE_H
#define HASHTABLE_H

#include <sys/types.h> /* for ssize_t */

/* These are possible return values of some ht_ functions (see below). */
#define HT_OK           0
#define HT_NO_MEM      -1
#define HT_KEY_PRESENT -2
#define HT_KEY_ABSENT  -3

typedef struct
{
  /* All members are considered implementation details, except for "entries",
   * which can be read, but should not be modified by external code.
   */
  size_t entries;

  /*
   * tab[] is where entries (chains of entries) are stored.
   * When rehashing, newtab[] is also used.
   */
  struct ht_node **tab, **newtab;

  /* sizes of tab[] and newtab[], obviously */
  size_t tab_size, new_size;

  size_t (*hashfunc)(const void* key);
  int (*cmpfunc)(const void* key1, const void *key2);

  /*
   * When no rehashing is taking place, rehashing_position is -1 (#define'd as
   * NOT_REHASHING in hashtable.c). At any other time, rehashing_position is the
   * lowest not yet rehashed position in the smaller table.
   */
  ssize_t rehashing_position;
} hashtable_t;

/*
 * All int functions return 0 (#define'd as HT_OK) on success and in
 * case of failure they return error codes, as described below.
 */

/* May fail with HT_NO_MEM. */
int ht_init(hashtable_t *ht,
	    size_t (*hash)(const void* key),
	    int (*cmp)(const void* key1, const void *key2));

/* May fail with HT_NO_MEM and HT_KEY_PRESENT. */
int ht_add(hashtable_t *ht, const void *key, const void *val);

/*
 * May fail with HT_NO_MEM. If key was not yet present in hashtable, *oldkey and
 * *oldval are not modified. Otherwise, just-replaced pair is stored in them.
 */
int ht_set(hashtable_t *ht, const void *key, const void *val,
	   void **oldkey, void **oldval);

/*
 * If present, the looked for pair is stored in *storedkey and *val. Otherwise,
 * they're not modified and HT_KEY_ABSENT is returned. storedkey and/or val can
 * be NULL.
 */
int ht_get(hashtable_t *ht, const void *key, void **storedkey, void **val);

/*
 * Works like ht_get() but is thread-safe with regard to other calls to
 * ht_get_threadsafe() on the same hashtable. Note that the hash and compare
 * functions supplied to the hashtable also have to be thread-safe (that
 * requirement is of course met for those used by ht_string_init()).
 */
int ht_get_threadsafe(hashtable_t *ht, const void *key,
		      void **storedkey, void **val);

/* Works like the above but also removes the pair from ht if found. */
int ht_rem(hashtable_t *ht, const void *key, void **storedkey, void **val);

/*
 * De-initializes the hashtable freeing all its structures. The programmer is
 * responsible for freeing keys and values if they were allocated from the heap
 * (see ht_map_destroy() below).
 */
void ht_destroy(hashtable_t *ht);

/* Calls ht_finish_resizing(), then maps through ht. */
void ht_map(hashtable_t *ht, void *arg,
	    void (*mapfunc)(const void *key, void *val, void *arg));

/*
 * It might be tempting to use ht_map() to free() all keys and values stored in
 * ht and then call ht_destroy(). If you think about it, ht_map() would leave
 * hashtable in a broken state - with keys being deallocated. Depending on the
 * implementation, ht_destroy() could cope with that, but we'd rather not
 * guarrantee anything, so here's another function just for that - mapping
 * through entries and destroying the hashtable immediately after, explicitly
 * allowing the mapping function to deallocate keys.
 */
void ht_map_destroy(hashtable_t *ht, void *arg,
		    void (*mapfunc)(void *key, void *val, void *arg));

/*
 * If hashtable is in the process of being rehashed, this function
 * processes it to the end. Otherwise - it does nothing.
 */
void ht_finish_resizing(hashtable_t *ht);

/* Included, since strings are commonly used as keys. */
size_t ht_string_hash(const char *key);

/*
 * May fail with HT_NO_MEM. Initializes ht for use with string keys
 * (using ht_string_hash() and strcmp()).
 */
int ht_string_init(hashtable_t *ht);

#endif /* HASHTABLE_H */