// https://libregit.org/koszko/C_hashtable
// Note that this version might or might not be more up-to-date than
// the one linked.

// GENERAL INFO

// You might want to read the beginning of hashtable.h first.

// In some places "rehashing" and in other ones "resizing" seemed to
// be the right word to use. They mean more or less the same.

// Functions starting with ht_ are part of the API. Internal functions
// are declared static. I also made some of them inline (either
// because they were extremely short or only called from 1 place).

// Hashtable size is always a power of 2.

// When the hashtable is ¾ full, a new, 2x bigger table is allocated
// and whenever one of 4 basic operations (adding, removing, setting,
// getting) occurs, 4 slots are being rehashed from old table into 8
// slots in new table. Similarly, when hashtable is ¼ full, a new,
// 2x smaller table is allocated and each of subsequent operations
// rehashes 8 entries from old table into 4 in new table.
// This mechanism has been made lazier: getting and removing don't
// trigger growing of ht even if it's 3/4 full. Similarly, getting,
// setting and adding don't trigger shrinking.
// Once resizing is triggered, however, any of the operations will
// contribute to rehashing. Even if, for example, the operation is
// ADD and the table is being shrinked.
// This means, that if we have a hashtable of size n which is ¾ full
// and growing is triggered, then each subsequent call to
// ht_{add,rem,get,set}() rehashes some entries and, depending on
// how frequently and how successfully each of these 4 funcs was
// called, at the end of resizing we get a size 2n hashtable which is
// between ¼ and ½ full. Similarly, if shrinking of a ¼ full
// hashtable of size n is triggered, then after some operations we
// get a size ½n hashtable, that is somewhere between ¼ and ¾ full.
// One can see now, that we always keep the hashtable between ¼ and ¾
// full (with the exception of a minimal size one, that can be empty).

#include "hashtable.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>

// We won't shrink hashtable below this size. Newly created one will
// be this big.
#define MIN_SIZE 32

// Special value of ht->rehashing_position.
#define NOT_REHASHING ((ssize_t) -1)

// Those are possible return values of do_resizing_related_stuff()
// and rehash_some_entries() (which only returns the first 2).
#define REHASHING_IN_PROGRESS     0
#define REHASHING_NOT_IN_PROGRESS 1
#define REHASHING_NO_MEM          2

/* Struct used to store a pair. */
struct ht_node
{
  const void *key;
  const void *val;
  struct ht_node *next;
};

enum op
  {
    GET,
    ADD,
    SET,
    REM,
  };

int ht_init(hashtable_t *ht,
	    size_t (*hash)(const void *key),
	    int (*cmp)(const void *key1, const void *key2))
{
  if (!(ht->tab = calloc(MIN_SIZE, sizeof(struct ht_node**))))
    return HT_NO_MEM;

  ht->tab_size = MIN_SIZE;
  ht->rehashing_position = NOT_REHASHING;
  ht->entries = 0;
  ht->hashfunc = hash;
  ht->cmpfunc = cmp;
  
  return HT_OK;
}

// First come some utilities :)

static inline size_t min(size_t n1, size_t n2)
{
  return n1 < n2 ? n1 : n2;
}

static inline size_t hash2(size_t n)
{
  // I found this "hash improver" on the internet.
  n ^= (n >> 20) ^ (n >> 12);
  return n ^ (n >> 7) ^ (n >> 4);
}

// Below are 2 list-handling utility functions.
static inline struct ht_node *join_lists(struct ht_node *l1,
					 struct ht_node *l2)
{
  if (!l1) return l2;
  if (!l2) return l1;

  struct ht_node *l1_last;

  for (l1_last = l1; l1_last->next; l1_last = l1_last->next);

  // Append l2 to the end of l1.
  l1_last->next = l2;

  // For convenience return the first element of the resulting list.
  return l1;
}


static inline void push(struct ht_node *node, struct ht_node **list)
{
  node->next = *list;
  *list = node;
}

// The following 2 rehash_* functions are helpers of
// rehash_some_entries().
// This func rehashes 1 chain of entries in tab[]
// into 2 chains in newtab[].
static inline void rehash_position_growing(hashtable_t *ht)
{
  // There are 2 possible new positions of an entry in a 2x bigger ht.
  struct ht_node *list0 = NULL, *list1 = NULL;
  
  size_t old_position = ht->rehashing_position,
    new_position0 = old_position,
    new_position1 = old_position | ht->tab_size;
  
  struct ht_node *pair = ht->tab[old_position], *next_pair;

  while (pair)
    {
      next_pair = pair->next;

      size_t new_position = hash2(ht->hashfunc(pair->key))
	& (ht->new_size - 1);

      push(pair, new_position == new_position1 ? &list1 : &list0);

      pair = next_pair;
    }

  ht->newtab[new_position0] = list0;
  ht->newtab[new_position1] = list1;

  ht->rehashing_position++;
}

// This func rehashes 2 chains of entries in tab[]
// into 1 chain in newtab[].
static inline void rehash_2positions_shrinking(hashtable_t *ht)
{
  size_t new_position = ht->rehashing_position,
    old_position0 = new_position,
    old_position1 = new_position | ht->new_size;
  
  ht->newtab[new_position] = join_lists(ht->tab[old_position0],
					ht->tab[old_position1]);

  ht->rehashing_position++;
}

// Rehashes 4(8) positions from tab to newtab. If those were the last
// enetries to rehash, the function takes care of everything
// (like deallocating old tab) and returns REHASHING_NOT_IN_PROGRESS.
// Otherwise, returns REHASHING_IN_PROGRESS.
// Caller must make sure rehashing was started b4 calling this func.
static int rehash_some_entries(hashtable_t *ht)
{
  int rehashes_left = 4;
  
  if (ht->new_size > ht->tab_size)                    // growing ht
    {
      while(rehashes_left--) rehash_position_growing(ht);
      if (ht->rehashing_position != ht->tab_size)
	return REHASHING_IN_PROGRESS;
    }
  else                                                // shrinking ht
    {
      while(rehashes_left--) rehash_2positions_shrinking(ht);
      if (ht->rehashing_position != ht->new_size)
	return REHASHING_IN_PROGRESS;
    }

  // rehashing finishes
  ht->rehashing_position = NOT_REHASHING;
  ht->tab_size = ht->new_size;
  free(ht->tab);
  ht->tab = ht->newtab;
  
  return REHASHING_NOT_IN_PROGRESS;
}

static inline bool resizing_taking_place(hashtable_t *ht)
{
  return !(ht->rehashing_position == NOT_REHASHING);
}

void ht_finish_resizing(hashtable_t *ht)
{
  if (resizing_taking_place(ht))
    while (rehash_some_entries(ht) == REHASHING_IN_PROGRESS);
}

static inline bool needs_growing(hashtable_t *ht)
{
  return ht->entries >= 3 * ht->tab_size / 4;
}

static inline bool needs_shrinking(hashtable_t *ht)
{
  return ht->tab_size > MIN_SIZE
    && ht->entries <= ht->tab_size / 4;
}

// Each of hashtable operations (add, set, rem, get) should also
// attempt to do part of resizing. This way resizing operation
// which is O(n) is distributed among many hashtable accesses
// each of them still being O(1). Without this the the amortized
// complexity of ht accesses would still be O(1), but a single access
// would sometimes be O(n).
// Other function that adds, sets, gets or removes sth from ht uses
// this one to do this "part of resizing" mentioned above.
// This func returns REHASHING_NO_MEM on failed malloc (won't happen
// for GET or REM operation) and REHASHING_[NOT_]IN_PROGRESS otherwise.
static inline int
do_resizing_related_stuff(hashtable_t *ht, const void *key, enum op op)
{
  bool resizing = resizing_taking_place(ht);
  
  if (!resizing)
    {
      size_t new_size;

      switch (op)
	{
	case GET:
	  goto dont_start_resizing;
	case ADD:
	case SET:
	  if (needs_growing(ht))
	    new_size = ht->tab_size * 2;
	  else
	    goto dont_start_resizing;
	  break;
	default: // case REM
	  if (needs_shrinking(ht))
	    new_size = ht->tab_size / 2;
	  else
	    goto dont_start_resizing;
	}
  
      struct ht_node **newtab;
      if (!(newtab = malloc(new_size * sizeof(struct ht_node*))))
	return op == REM ? REHASHING_NOT_IN_PROGRESS : REHASHING_NO_MEM;
  
      ht->newtab = newtab;
      ht->new_size = new_size;
      ht->rehashing_position = 0;

      resizing = true;
    }
  
 dont_start_resizing:

  return resizing ?
    rehash_some_entries(ht) : REHASHING_NOT_IN_PROGRESS;
}

// This is a chaining hashtable, so each element in the array (table)
// is actually a list of entries. All operations (adding, removing,
// etc.) need to find the right list of entries (here called "bucket")
// for a given key first, so it makes sense to do it in a separate
// function. The bucket may be in tab or newtab if resizing is taking
// place. Being informed by the caller if resizing is in progress,
// this func does not need to check for it by itself.
static inline struct ht_node **find_bucket(hashtable_t *ht, const void *key,
					   bool resizing_in_progress)
{
  size_t hash = hash2(ht->hashfunc(key)),
    destination_tab_size, position;
  
  struct ht_node **destination_tab;
  
  if (resizing_in_progress)
    // Here we must check whether our key's bucket is still
    // in ht->tab or already rehashed to ht->newtab.
    {
      size_t smaller_tab_size = min(ht->tab_size, ht->new_size),
	smaller_tab_position = hash & (smaller_tab_size - 1);

      if (smaller_tab_position < ht->rehashing_position)
	{
	  destination_tab = ht->newtab;
	  destination_tab_size = ht->new_size;
	}
      else
	{
	  destination_tab = ht->tab;
	  destination_tab_size = ht->tab_size;
	}
    }
  else
    // In this case we know, we're working on ht->tab and not newtab.
    {
      destination_tab = ht->tab;
      destination_tab_size = ht->tab_size;
    }

  position = hash & (destination_tab_size - 1);
  return &destination_tab[position];
}

// Operations of adding, removing, etc. all work on list of entries
// (bucket) to wchich key hashes and they have some common logic, so
// it made sense to make a single function, that does the right
// operation based on an enum passed to it.
static inline int
perform_operation_on_bucket(hashtable_t *ht, struct ht_node **bucket,
			    const void *key, const void *val,
			    void **keyptr, void **valptr,
			    enum op op)
{
  for (struct ht_node **pairptr = bucket, *pair = *pairptr;
       pair;
       pairptr = &pair->next, pair = pair->next)
    
    if (!ht->cmpfunc(key, pair->key))
      {
	if (op == ADD)
	  return HT_KEY_PRESENT;
	
	if (keyptr) *keyptr = (void*) pair->key;
	if (valptr) *valptr = (void*) pair->val;
	
	switch (op)
	  {
	  case GET:
	    {
	      return HT_OK;
	    }
	  case SET:
	    {
	      pair->key = key;
	      pair->val = val;
	      return HT_OK;
	    }
	  default: // case REM
	    {
	      *pairptr = pair->next;
	      free(pair);
	      ht->entries--;
	      return HT_OK;
	    }
	  }
      }

  if (op == GET || op == REM)
    return HT_KEY_ABSENT;

  // op == ADD || op == SET

  struct ht_node *new_pair = malloc(sizeof(struct ht_node));
  if (!new_pair)
    return HT_NO_MEM;

  *new_pair = (struct ht_node) {.key = key, .val = val};
  push(new_pair, bucket);
  ht->entries++;

  return HT_OK;
}

// Generic function for performing of adding, removing, setting and
// getting.
static int perform_operation(hashtable_t *ht, const void *key, const void *val,
			     void **keyptr, void **valptr, enum op op)
{
  bool resizing_in_progress;

  switch (do_resizing_related_stuff(ht, key, op))
    {
    case REHASHING_IN_PROGRESS:
      resizing_in_progress = true;
      break;
    case REHASHING_NOT_IN_PROGRESS:
      resizing_in_progress = false;
      break;
    default: // case REHASHING_NO_MEM
      return HT_NO_MEM;
    }

  struct ht_node **bucket =
    find_bucket(ht, key, resizing_in_progress);

  return perform_operation_on_bucket(ht, bucket, key, val,
				     keyptr, valptr, op);
}

// The 4 functions below are the main part of the API.
int ht_get(hashtable_t *ht, const void *key,
	   void **storedkey, void **val)
{
  return perform_operation(ht, key, NULL, storedkey, val, GET);
}

int ht_add(hashtable_t *ht, const void *key, const void *val)
{
  return perform_operation(ht, key, val, NULL, NULL, ADD);
}

int ht_set(hashtable_t *ht, const void *key, const void *val,
	   void **oldkey, void **oldval)
{
  return perform_operation(ht, key, val, oldkey, oldval, SET);
}

int ht_rem(hashtable_t *ht, const void *key,
	   void **storedkey, void **val)
{
  return perform_operation(ht, key, NULL, storedkey, val, REM);
}

// As mentioned in hashtable.h, this func does not deallocate keys
// nor vals. One could use ht_map_destroy() if that is needed.
void ht_destroy(hashtable_t *ht)
{
  if (!ht->entries)
    return;
  
  ht_finish_resizing(ht);

  struct ht_node **tab = ht->tab;
  
  for (ssize_t position = ht->tab_size - 1;
       position >= 0; position--)
    {
      struct ht_node *pair = tab[position], *nextpair;
      
      while (pair)
	{
	  nextpair = pair->next;
	  free(pair);
	  pair = nextpair;
	}
    }
  
  free(ht->tab);
}

void ht_map(hashtable_t *ht, void *arg,
	    void (*mapfunc)(const void *key, void *val, void *arg))
{
  if (!ht->entries)
    return;
  
  ht_finish_resizing(ht);

  struct ht_node **tab = ht->tab;
  
  for (ssize_t position = ht->tab_size - 1;
       position >= 0; position--)
    
    for (struct ht_node *pair = tab[position];
	 pair; pair = pair->next)

      mapfunc(pair->key, (void*) pair->val, arg);
}

void ht_map_destroy(hashtable_t *ht, void *arg,
		    void (*mapfunc)(void *key, void *val, void *arg))
{
  // If mapfunc() deallocates keys, the following 2 lines make
  // assumption on ht_destroy(), that it doesn't call ht->hashfunc()
  // or ht->cmpfunc() on keys.
  ht_map(ht, arg, (void (*)(const void*, void*, void*)) mapfunc);
  ht_destroy(ht);
}

// These 2 functions are for easy making of hashtable with strings as
// keys. Note that this hash is *not* secure against DoS attacks.
size_t ht_string_hash(const char *key)
{
  size_t i = 0, hash = (size_t) 0xa1bad2dead3beef4;

  do
    {
      char shift = ((unsigned char) key[i]) % sizeof(size_t);
      hash += ((hash >> shift) | (hash << (sizeof(size_t) - shift)))
	^ key[i];
    }
  while (key[i++]);

  return hash;
}

int ht_string_init(hashtable_t *ht)
{
  return ht_init(ht, (size_t (*)(const void*)) &ht_string_hash,
		 (int (*)(const void*, const void*)) &strcmp);
}