From 975465c3f5302117eef779672ada76627371c3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yaroslav=20de=20la=20Pe=C3=B1a=20Smirnov?= Date: Fri, 5 Nov 2021 19:45:02 +0300 Subject: Hashmap improvements and include guards Hashmap capacity can be set programmatically and it grows/shrinks in size when the load increases/decreases to avoid collisions/save memory. --- src/hashmap.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++----------- src/hashmap.h | 49 ++++++++++++++--- src/template.h | 13 ++++- src/vector.h | 9 ++- 4 files changed, 199 insertions(+), 43 deletions(-) diff --git a/src/hashmap.c b/src/hashmap.c index 16a950f..fa9a295 100644 --- a/src/hashmap.c +++ b/src/hashmap.c @@ -1,41 +1,130 @@ +#include "hashmap.h" + +#include #include #include #include -#include "hashmap.h" + +#if SIZE_MAX == 0xFFFFFFFF +/* If size_t is 32bit */ +static const size_t fnv_prime = 16777619u; +static const size_t fnv_offsetb = 2166136261u; +#elif SIZE_MAX == 0xFFFFFFFFFFFFFFFF +/* If size_t is 64bit */ +static const size_t fnv_prime = 1099511628211u; +static const size_t fnv_offsetb = 14695981039346656037u; +#else +/* If size_t is 128bit. Maybe this is overdoing it? */ +static const size_t fnv_prime = 309485009821345068724781371u; +static const size_t fnv_offsetb = 144066263297769815596495629667062367629u; +#endif struct node { - char *key; + const char *key; void *value; struct node *next; }; -unsigned long -hash(char *str) +/* FNV1a */ +static size_t +hash(const char *str) { - unsigned long hash = 5381; - int c; + size_t hash = fnv_offsetb; + size_t i = 0; + while (str[i] != '\0') { + hash ^= str[i]; + hash *= fnv_prime; + i++; + } - while ((c = *str++)) { - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - } + return hash; +} - return hash; +static void +htable_realloc(struct hashmap *hm, struct node **oldbucks, size_t oldcap) +{ + for (size_t i = 0; i < oldcap; i++) { + struct node *node = oldbucks[i]; + while (node) { + size_t pos = hash(node->key) % hm->cur_cap; + if (hm->buckets[pos]) { + struct node *newnext = hm->buckets[pos], + *newprev; + do { + newprev = newnext; + newnext = newprev->next; + } while (newnext); + newprev->next = node; + } else { + hm->buckets[pos] = node; + } + struct node *prev = node; + node = prev->next; + prev->next = NULL; + } + } + free(oldbucks); } -/* allocate a new hashmap */ -struct hashmap *hashmap_new() { - struct hashmap *hm = malloc(sizeof *hm); - if (!hm) err(EXIT_FAILURE, "out of memory"); - for (int i=0; i < HASHMAP_CAP; i++) { - hm->buckets[i] = NULL; - } +static bool +htable_grow(struct hashmap *hm) +{ + float load = (float)hm->size / (float)hm->cur_cap; + if (load <= HASHMAP_MAX_LOAD) return true; - return hm; + size_t oldcap = hm->cur_cap; + hm->cur_cap = hm->cur_cap * HASHMAP_GROW_RATE; + struct node **oldbucks = hm->buckets; + hm->buckets = calloc(hm->cur_cap, sizeof(**hm->buckets)); + if (!hm->buckets) return false; + + htable_realloc(hm, oldbucks, oldcap); + + return true; +} + +static bool +htable_shrink(struct hashmap *hm) +{ + if (hm->cur_cap <= hm->init_cap) return true; + + float load = (float)hm->size / (float)hm->cur_cap; + if (load >= HASHMAP_MIN_LOAD) return true; + + size_t oldcap = hm->cur_cap; + hm->cur_cap = hm->cur_cap / HASHMAP_GROW_RATE; + struct node **oldbucks = hm->buckets; + hm->buckets = calloc(hm->cur_cap, sizeof(**hm->buckets)); + if (!hm->buckets) return false; + htable_realloc(hm, oldbucks, oldcap); + return true; +} + +struct hashmap * +hashmap_new_with_cap(size_t cap) +{ + struct hashmap *hm = malloc(sizeof *hm); + hm->init_cap = cap; + hm->cur_cap = cap; + hm->size = 0; + if (hm == NULL) return NULL; + hm->buckets = calloc(cap, sizeof hm->buckets); + if (hm->buckets == NULL) { + free(hm); + return NULL; + } + + return hm; } -/* Inserts a key-value pair into the map. Returns NULL if map did not have key, old value if it did. */ -void *hashmap_insert(struct hashmap *hm, char *key, void *value) { - int pos = hash(key) % HASHMAP_CAP; +/* + * Inserts a key-value pair into the map. Returns NULL if map did not have key, + * old value if it did. + */ +void * +hashmap_insert(struct hashmap *hm, const char *key, void *value) +{ + int pos = hash(key) % hm->cur_cap; struct node *head = hm->buckets[pos]; struct node *node = head; void *old_value; @@ -54,12 +143,16 @@ void *hashmap_insert(struct hashmap *hm, char *key, void *value) { node->value = value; node->next = head; hm->buckets[pos] = node; + hm->size++; + htable_grow(hm); return NULL; } /* Returns a pointer to the value corresponding to the key. */ -void *hashmap_get(struct hashmap *hm, char *key) { - unsigned int pos = hash(key) % HASHMAP_CAP; +void * +hashmap_get(struct hashmap *hm, const char *key) +{ + size_t pos = hash(key) % hm->cur_cap; struct node *node = hm->buckets[pos]; while (node != NULL) { if (strcmp(node->key, key) == 0) { @@ -73,7 +166,9 @@ void *hashmap_get(struct hashmap *hm, char *key) { } /* Retrieve pointer to value by key, handles dot notation for nested hashmaps */ -void *hashmap_resolve(struct hashmap *hm, char *key) { +void * +hashmap_resolve(struct hashmap *hm, const char *key) +{ char tmp_key[64]; int i = 0; int j = 0; @@ -97,9 +192,14 @@ void *hashmap_resolve(struct hashmap *hm, char *key) { return hm; } -/* Removes a key from the map, returning the value at the key if the key was previously in the map. */ -void *hashmap_remove(struct hashmap *hm, char *key) { - int pos = hash(key) % HASHMAP_CAP; +/* + * Removes a key from the map, returning the value at the key if the key was + * previously in the map. + */ +void * +hashmap_remove(struct hashmap *hm, const char *key) +{ + int pos = hash(key) % hm->cur_cap; struct node *node = hm->buckets[pos]; struct node *prev = NULL; void *old_value; @@ -113,6 +213,8 @@ void *hashmap_remove(struct hashmap *hm, char *key) { } old_value = node->value; free(node); + hm->size--; + htable_shrink(hm); return old_value; } @@ -123,11 +225,13 @@ void *hashmap_remove(struct hashmap *hm, char *key) { return NULL; } -void hashmap_walk(struct hashmap *hm, void (*fn)(void *value)) { +void +hashmap_walk(struct hashmap *hm, void (*fn)(void *value)) +{ struct node *node; struct node *next; - for (int i=0; i < HASHMAP_CAP; i++) { + for (int i=0; i < hm->cur_cap; i++) { node = hm->buckets[i]; while (node) { next = node->next; @@ -138,11 +242,13 @@ void hashmap_walk(struct hashmap *hm, void (*fn)(void *value)) { } /* free hashmap related memory */ -void hashmap_free(struct hashmap *hm) { +void +hashmap_free(struct hashmap *hm) +{ struct node *node; struct node *next; - for (int i=0; i < HASHMAP_CAP; i++) { + for (int i=0; i < hm->cur_cap; i++) { node = hm->buckets[i]; while (node) { next = node->next; @@ -151,5 +257,6 @@ void hashmap_free(struct hashmap *hm) { } } + free(hm->buckets); free(hm); -} \ No newline at end of file +} diff --git a/src/hashmap.h b/src/hashmap.h index dbb45da..5a1e22d 100644 --- a/src/hashmap.h +++ b/src/hashmap.h @@ -1,13 +1,46 @@ -#define HASHMAP_CAP 26 +#ifndef UNJA_HASHMAP_H +#define UNJA_HASHMAP_H +#include +#include + +#ifndef HASHMAP_CAP +#define HASHMAP_CAP 32 +#endif + +#ifndef HASHMAP_MIN_LOAD +#define HASHMAP_MIN_LOAD 0.2 +#endif + +#ifndef HASHMAP_MAX_LOAD +#define HASHMAP_MAX_LOAD 0.75 +#endif + +#ifndef HASHMAP_GROW_RATE +#define HASHMAP_GROW_RATE 2 +#endif struct hashmap { - struct node *buckets[HASHMAP_CAP]; + struct node **buckets; + size_t init_cap; + size_t cur_cap; + size_t size; }; -struct hashmap *hashmap_new(); -void *hashmap_insert(struct hashmap *hm, char *key, void *value); -void *hashmap_get(struct hashmap *hm, char *key); -void *hashmap_resolve(struct hashmap *hm, char *key); -void *hashmap_remove(struct hashmap *hm, char *key); +/* allocate a new hashmap */ +struct hashmap *hashmap_new_with_cap(size_t cap); + +#define hashmap_new() hashmap_new_with_cap(HASHMAP_CAP) + +void *hashmap_insert(struct hashmap *hm, const char *key, void *value); + +void *hashmap_get(struct hashmap *hm, const char *key); + +void *hashmap_resolve(struct hashmap *hm, const char *key); + +void *hashmap_remove(struct hashmap *hm, const char *key); + void hashmap_free(struct hashmap *hm); -void hashmap_walk(struct hashmap *hm, void (*fn)(void *value)); \ No newline at end of file + +void hashmap_walk(struct hashmap *hm, void (*fn)(void *value)); + +#endif diff --git a/src/template.h b/src/template.h index 45a58fc..6defb02 100644 --- a/src/template.h +++ b/src/template.h @@ -1,10 +1,19 @@ +#ifndef UNJA_TEMPLATE_H +#define UNJA_TEMPLATE_H + #include "hashmap.h" #include "vector.h" - struct env; + struct env *env_new(); + void env_free(struct env *env); + char *template(struct env *env, char *template_name, struct hashmap *ctx); + char *template_string(char *tmpl, struct hashmap *ctx); -char *read_file(char *filename); \ No newline at end of file + +char *read_file(char *filename); + +#endif diff --git a/src/vector.h b/src/vector.h index e4a912a..0a7c297 100644 --- a/src/vector.h +++ b/src/vector.h @@ -1,3 +1,6 @@ +#ifndef UNJA_VECTOR_H +#define UNJA_VECTOR_H + #include struct vector { @@ -7,5 +10,9 @@ struct vector { }; struct vector* vector_new(int cap); + int vector_push(struct vector *vec, void *value); -void vector_free(struct vector *vec); \ No newline at end of file + +void vector_free(struct vector *vec); + +#endif -- cgit v1.2.3