Linear Probing Hash Table
https://en.wikipedia.org/wiki/Hash_table
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// using liner proing &
#define DEFALT_BUCKETS 8
#define MAX_KEY_SIZE 128
typedef struct hashmap {
int n_bucket;
int cnt;
void **buckets;
} HashMap;
typedef struct node {
char *key;
void *val;
} Node;
int hash(char *str, int m);
void rehash(HashMap *map);
HashMap *Hashmap_new();
void Hashmap_set(HashMap *map, char *key, void *value);
bool Hashmap_exists(HashMap *map, char *key);
void *Hashmap_get(HashMap *map, char *key);
void Hashmap_delete(HashMap *map, char *key);
void Hashmap_free(HashMap *map);
void node_free(Node *node);
Node *node_new(char *key, void *val);
HashMap *Hashmap_new() {
HashMap *res = (HashMap *)malloc(sizeof(HashMap));
res->n_bucket = DEFALT_BUCKETS;
res->buckets = calloc(DEFALT_BUCKETS, sizeof(void *));
res->cnt = 0;
return res;
}
// https://theartincode.stanis.me/008-djb2/
// djb2 hash
int hash(char *str, int m) {
unsigned long hash = 5381;
while (*str != '\0') {
hash = ((hash << 5) + hash) + (int)(*str);
str++;
}
return hash % m;
}
void rehash(HashMap *map) {
void **old_buckets = map->buckets;
int old_n = map->n_bucket;
map->n_bucket *= 2;
map->buckets = calloc(map->n_bucket, sizeof(void *));
map->cnt = 0;
Node *cur_node;
for (int i = 0; i < old_n; i++) {
cur_node = (Node *)old_buckets[i];
if (cur_node != NULL) {
// FIXME: 优化直接使用原先的node 而不是释放copy
Hashmap_set(map, cur_node->key, cur_node->val);
free(cur_node->key);
free(old_buckets[i]);
}
}
free(old_buckets);
}
// if the key already exists, update value
void Hashmap_set(HashMap *map, char *key, void *value) {
if (map->cnt > map->n_bucket * 0.75) {
rehash(map);
}
int idx = hash(key, map->n_bucket);
int start_idx = idx;
Node *cur_node;
while (map->buckets[idx] != NULL) {
cur_node = (Node *)map->buckets[idx];
if (strcmp(cur_node->key, key) == 0) {
cur_node->val = value;
return;
}
idx = (idx + 1) % map->n_bucket;
if (idx == start_idx) {
// FIXME: fail as we rehash when the load factor exceeds 0.75
assert(1 == 2);
return;
}
}
map->buckets[idx] = node_new(key, value);
map->cnt += 1;
}
bool Hashmap_exists(HashMap *map, char *key) {
int idx = hash(key, map->n_bucket);
int start_idx = idx;
while (map->buckets[idx] != NULL) {
Node *cur_node = (Node *)map->buckets[idx];
if (strcmp(cur_node->key, key) == 0) {
return true;
}
idx = (idx + 1) % map->n_bucket;
if (idx == start_idx) {
return false;
}
}
return false;
}
void *Hashmap_get(HashMap *map, char *key) {
int idx = hash(key, map->n_bucket);
Node *n = (Node *)map->buckets[idx];
if (n == NULL) {
return NULL;
}
while (n != NULL && strcmp(n->key, key) != 0) {
idx = (idx + 1) % map->n_bucket;
n = (Node *)map->buckets[idx];
}
if (n == NULL) {
return NULL;
}
return n->val;
}
void Hashmap_delete(HashMap *map, char *key) {
int idx = hash(key, map->n_bucket);
Node *n = (Node *)map->buckets[idx];
if (n == NULL) {
return;
}
node_free(n);
map->buckets[idx] = NULL;
map->cnt -= 1;
}
void Hashmap_free(HashMap *map) {
for (int i = 0; i < map->n_bucket; i++) {
node_free(map->buckets[i]);
}
free(map->buckets);
free(map);
}
Node *node_new(char *key, void *val) {
Node *res = (Node *)malloc(sizeof(Node));
int n = strlen(key) + 1;
char *k = (char *)malloc(n);
strncpy(k, key, n);
res->key = k;
res->val = val;
}
void node_free(Node *node) {
if (node == NULL) {
return;
}
free(node->key);
free(node);
}
int main() {
HashMap *h = Hashmap_new();
// basic get/set functionality
int a = 5;
float b = 7.2;
Hashmap_set(h, "item a", &a);
Hashmap_set(h, "item b", &b);
assert(Hashmap_exists(h, "item a"));
assert(Hashmap_exists(h, "item b"));
assert(!Hashmap_exists(h, "item c"));
assert(Hashmap_get(h, "item a") == &a);
assert(Hashmap_get(h, "item b") == &b);
// using the same key should override the previous value
int c = 20;
Hashmap_set(h, "item a", &c);
assert(Hashmap_get(h, "item a") == &c);
// basic delete functionality
Hashmap_delete(h, "item a");
assert(Hashmap_get(h, "item a") == NULL);
assert(!Hashmap_exists(h, "item a"));
// handle collisions correctly
// note: this doesn't necessarily test expansion
int i, n = DEFALT_BUCKETS * 10, ns[n];
char key[MAX_KEY_SIZE];
for (i = 0; i < n; i++) {
ns[i] = i;
sprintf(key, "item %d", i);
Hashmap_set(h, key, &ns[i]);
}
for (i = 0; i < n; i++) {
sprintf(key, "item %d", i);
void *val = Hashmap_get(h, key);
assert(val == &ns[i]);
}
Hashmap_free(h);
/*
stretch goals:
- expand the underlying array if we start to get a lot of collisions
- support non-string keys
- try different hash functions
- switch from chaining to open addressing
- use a sophisticated rehashing scheme to avoid clustered collisions
- implement some features from Python dicts, such as reducing space use,
maintaing key ordering etc. see https://www.youtube.com/watch?v=npw4s1QTmPg
for ideas
*/
printf("ok\n");
return 0;
}