aboutsummaryrefslogtreecommitdiff
/*
  Copyright 2019 Wojciech Kosior
  
  This is free and unencumbered software released into the public domain.

  Anyone is free to copy, modify, publish, use, compile, sell, or
  distribute this software, either in source code form or as a compiled
  binary, for any purpose, commercial or non-commercial, and by any
  means.

  In jurisdictions that recognize copyright laws, the author or authors
  of this software dedicate any and all copyright interest in the
  software to the public domain. We make this dedication for the benefit
  of the public at large and to the detriment of our heirs and
  successors. We intend this dedication to be an overt act of
  relinquishment in perpetuity of all present and future rights to this
  software under copyright law.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  OTHER DEALINGS IN THE SOFTWARE.

  For more information, please refer to <http://unlicense.org/>
*/


// https://libregit.org/koszko/C_hashtable

// This is a separate chaining hashtable for general use. It's not
// universal: it uses malloc() and free(), so it requires a standard
// library to function and it's for single-threaded use only. It does,
// however, have one advantage: it rehashes automatically, both when
// it grows in size and when it shrinks, while retaining O(1) access
// time. A normal hashtable with rehashing would have amortized O(1)
// access time, but there would be single access with O(n) time
// complexity for each rehashing. In this hashtable, rehashing is done
// in parts. For example, a ht_add(), aside from adding an entry,
// might also rehash 4 other entries from old table to the new one and
// leave the rest unrehashed.
// Of course, it is assumed that a good hash function is provided
// by the programmer. If not, accesses may still degenerate to O(n).

#include <sys/types.h> // for ssize_t

// These are possible retrn values of some ht_ functions (see below).
#define HT_OK           0
#define HT_NO_MEM      -1
#define HT_KEY_PRESENT -2
#define HT_KEY_ABSENT  -3

// Struct used to store the pair. Implementation detail.
struct ht_node
{
  void *key;
  void *val;
  struct ht_node *next;
};

typedef struct
{
  // All members are considered implementation details, except for
  // "entries", which can be read, but should not be modified by
  // external code.
  size_t entries;

  // tab[] is where entries (chains of entries) are stored.
  // When rehashing, newtab[] is also used.
  struct ht_node **tab, **newtab;

  // sizes of tab[] and newtab[], obviously
  size_t tab_size, new_size;

  size_t (*hashfunc)(void* key);
  int (*cmpfunc)(void* key1, void *key2);

  // When no rehashing is taking place, rehashing_position is -1
  // (#define'd as NOT_REHASHING in hashtable.c). At any other time,
  // rehashing_position is the lowest not yet rehashed position in
  // the smaller table.
  ssize_t rehashing_position;
} hashtable_t;

// All int functions return 0 (#define'd as HT_OK) on success and in
// case of failure they return error codes, as described below.

// May fail with HT_NO_MEM.
int ht_init(hashtable_t *ht,
	    size_t (*hash)(void* key),
	    int (*cmp)(void* key1, void *key2));

// May fail with HT_NO_MEM and HT_KEY_PRESENT.
int ht_add(hashtable_t *ht, void *key, void *val);

// May fail with HT_NO_MEM. If key was not yet present in hashtable,
// *oldkey and *oldval are not modified. Otherwise, just-replaced pair
// is stored in them.
int ht_set(hashtable_t *ht, void *key, void *val,
	   void **oldkey, void **oldval);

// If present, the looked for pair is stored in *storedkey and *val.
// Otherwise, they're not modified and HT_KEY_ABSENT is returned.
// storedkey and/or val can be NULL.
int ht_get(hashtable_t *ht, void *key, void **storedkey, void **val);

// Works like the above but also removes the pair from ht if found.
int ht_rem(hashtable_t *ht, void *key, void **storedkey, void **val);

// De-initializes the hashtable freeing all it's structures.
// The programmer is responsible for freeing keys and values if they
// were allocated from the heap (see ht_map_destroy() below).
void ht_destroy(hashtable_t *ht);

// Calls ht_finish_resizing(), then maps through ht.
void ht_map(hashtable_t *ht, void *arg,
	    void (*mapfunc)(void *key, void *val, void *arg));

// It might be tempting to use ht_map() to free() all keys and values
// stored in ht and then call ht_destroy(). If you think about it,
// ht_map() would leave hashtable in a broken state - with keys being
// deallocated. Depending on the implementation, ht_destroy() could
// cope with that, but we'd rather not guarrantee anything, so here's
// another function just for that - mapping through entries and
// destroying the hashtable immediately after, explicitly allowing
// the mapping function to dealocate keys.
void ht_map_destroy(hashtable_t *ht, void *arg,
		    void (*mapfunc)(void *key, void *val, void *arg));

// If hashtable is in the process of being rehashed, this function
// processes it to the end. Otherwise - it does nothing.
void ht_finish_resizing(hashtable_t *ht);

// Included, since strings are commonly used as keys
size_t ht_string_hash(const char *key);

// May fail with HT_NO_MEM. Initializes ht for use with string keys
// (using ht_string_hash() and strcmp()).
int ht_string_init(hashtable_t *ht);