hashtbl: revamp the hash table interface, support binary keys

Add binary key support to the hash table interface. Clean up the interface to contain less extraneous crud. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-10-10 00:25:06 -04:00 · 2018-12-11 12:30:25 -08:00
parent ddb290681e
commit ebb05a0e5f
11 changed files with 214 additions and 151 deletions
--- a/asm/labels.c
+++ b/asm/labels.c
@@ -545,8 +545,6 @@ void backend_label(const char *label, int32_t segment, int64_t offset)

 int init_labels(void)
 {
-    hash_init(&ltab, HASH_LARGE);
-
    ldata = lfree = nasm_malloc(LBLK_SIZE);
    init_block(lfree);

--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -623,12 +623,13 @@ static void free_mmacro(MMacro * m)
 */
 static void free_smacro_table(struct hash_table *smt)
 {
-    SMacro *s, *tmp;
-    const char *key;
-    struct hash_tbl_node *it = NULL;
+    struct hash_iterator it;
+    const struct hash_node *np;

-    while ((s = hash_iterate(smt, &it, &key)) != NULL) {
-        nasm_free((void *)key);
+    hash_for_each(smt, it, np) {
+        SMacro *tmp;
+        SMacro *s = np->data;
+        nasm_free((void *)np->key);
        list_for_each_safe(s, tmp, s) {
            nasm_free(s->name);
            free_tlist(s->expansion);
@@ -640,14 +641,14 @@ static void free_smacro_table(struct hash_table *smt)

 static void free_mmacro_table(struct hash_table *mmt)
 {
-    MMacro *m, *tmp;
-    const char *key;
-    struct hash_tbl_node *it = NULL;
+    struct hash_iterator it;
+    const struct hash_node *np;

-    it = NULL;
-    while ((m = hash_iterate(mmt, &it, &key)) != NULL) {
-        nasm_free((void *)key);
-        list_for_each_safe(m ,tmp, m)
+    hash_for_each(mmt, it, np) {
+        MMacro *tmp;
+        MMacro *m = np->data;
+        nasm_free((void *)np->key);
+        list_for_each_safe(m, tmp, m)
            free_mmacro(m);
    }
    hash_free(mmt);
@@ -664,8 +665,6 @@ static void free_macros(void)
 */
 static void init_macros(void)
 {
-    hash_init(&smacros, HASH_LARGE);
-    hash_init(&mmacros, HASH_LARGE);
 }

 /*
@@ -691,12 +690,14 @@ hash_findi_add(struct hash_table *hash, const char *str)
    struct hash_insert hi;
    void **r;
    char *strx;
+    size_t l = strlen(str) + 1;

-    r = hash_findi(hash, str, &hi);
+    r = hash_findib(hash, str, l, &hi);
    if (r)
        return r;

-    strx = nasm_strdup(str);    /* Use a more efficient allocator here? */
+    strx = nasm_malloc(l);  /* Use a more efficient allocator here? */
+    memcpy(strx, str, l);
    return hash_add(&hi, strx, NULL);
 }

@@ -2624,9 +2625,8 @@ static int do_directive(Token *tline, char **output)
        }

        if (i == PP_PUSH) {
-            ctx = nasm_malloc(sizeof(Context));
+            nasm_new(ctx);
            ctx->next = cstk;
-            hash_init(&ctx->localmac, HASH_SMALL);
            ctx->name = p;
            ctx->number = unique++;
            cstk = ctx;
@@ -4932,7 +4932,6 @@ pp_reset(const char *file, int apass, struct strlist *dep_list)

 static void pp_init(void)
 {
-    hash_init(&FileHash, HASH_MEDIUM);
 }

 static char *pp_getline(void)
--- a/asm/srcfile.c
+++ b/asm/srcfile.c
@@ -50,7 +50,6 @@ static struct hash_table filename_hash;

 void src_init(void)
 {
-    hash_init(&filename_hash, HASH_MEDIUM);
 }

 void src_free(void)
--- a/include/hashtbl.h
+++ b/include/hashtbl.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *   
- *   Copyright 1996-2017 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@@ -43,43 +43,58 @@
 #include <stddef.h>
 #include "nasmlib.h"

-struct hash_tbl_node {
+struct hash_node {
    uint64_t hash;
-    const char *key;
+    const void *key;
+    size_t keylen;
    void *data;
 };

 struct hash_table {
-    struct hash_tbl_node *table;
+    struct hash_node *table;
    size_t load;
    size_t size;
    size_t max_load;
 };

 struct hash_insert {
-    uint64_t hash;
    struct hash_table *head;
-    struct hash_tbl_node *where;
+    struct hash_node *where;
+    struct hash_node node;
+};
+
+struct hash_iterator {
+    const struct hash_table *head;
+    const struct hash_node *next;
 };

 uint64_t crc64(uint64_t crc, const char *string);
 uint64_t crc64i(uint64_t crc, const char *string);
+uint64_t crc64b(uint64_t crc, const void *data, size_t len);
+uint64_t crc64ib(uint64_t crc, const void *data, size_t len);
 #define CRC64_INIT UINT64_C(0xffffffffffffffff)

-/* Some reasonable initial sizes... */
-#define HASH_SMALL	4
-#define HASH_MEDIUM	16
-#define HASH_LARGE	256
-
-void hash_init(struct hash_table *head, size_t size);
 void **hash_find(struct hash_table *head, const char *string,
 		struct hash_insert *insert);
+void **hash_findb(struct hash_table *head, const void *key, size_t keylen,
+		struct hash_insert *insert);
 void **hash_findi(struct hash_table *head, const char *string,
 		struct hash_insert *insert);
-void **hash_add(struct hash_insert *insert, const char *string, void *data);
-void *hash_iterate(const struct hash_table *head,
-		   struct hash_tbl_node **iterator,
-		   const char **key);
+void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
+                   struct hash_insert *insert);
+void **hash_add(struct hash_insert *insert, const void *key, void *data);
+static inline void hash_iterator_init(const struct hash_table *head,
+                                      struct hash_iterator *iterator)
+{
+    iterator->head = head;
+    iterator->next = head->table;
+}
+const struct hash_node *hash_iterate(struct hash_iterator *iterator);
+
+#define hash_for_each(_head,_it,_np) \
+    for (hash_iterator_init((_head), &(_it)), (_np) = hash_iterate(&(_it)) ; \
+         (_np) ; (_np) = hash_iterate(&(_it)))
+
 void hash_free(struct hash_table *head);
 void hash_free_all(struct hash_table *head, bool free_keys);

--- a/include/strlist.h
+++ b/include/strlist.h
@@ -44,7 +44,7 @@

 struct strlist_entry {
 	struct strlist_entry	*next;
-	size_t			len;
+	size_t			size;
 	char			str[1];
 };

--- a/nasmlib/crc64.c
+++ b/nasmlib/crc64.c
@@ -187,3 +187,25 @@ uint64_t crc64i(uint64_t crc, const char *str)

    return crc;
 }
+
+uint64_t crc64b(uint64_t crc, const void *data, size_t len)
+{
+    const uint8_t *str = data;
+
+    while (len--) {
+	crc = crc64_tab[(uint8_t)crc ^ *str++] ^ (crc >> 8);
+    }
+
+    return crc;
+}
+
+uint64_t crc64ib(uint64_t crc, const void *data, size_t len)
+{
+    const uint8_t *str = data;
+
+    while (len--) {
+	crc = crc64_tab[(uint8_t)crc ^ nasm_tolower(*str++)] ^ (crc >> 8);
+    }
+
+    return crc;
+}
--- a/nasmlib/hashtbl.c
+++ b/nasmlib/hashtbl.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
 *
- *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
 *   See the file AUTHORS included with the NASM distribution for
 *   the specific copyright holders.
 *
@@ -43,10 +43,11 @@
 #include "nasm.h"
 #include "hashtbl.h"

-#define HASH_MAX_LOAD   2 /* Higher = more memory-efficient, slower */
+#define HASH_MAX_LOAD   2	/* Higher = more memory-efficient, slower */
+#define HASH_INIT_SIZE  16      /* Initial size (power of 2, min 4) */

-#define hash_calc(key)          crc64(CRC64_INIT, (key))
-#define hash_calci(key)         crc64i(CRC64_INIT, (key))
+#define hash_calc(key,keylen)   crc64b(CRC64_INIT, (key), (keylen))
+#define hash_calci(key,keylen)  crc64ib(CRC64_INIT, (key), (keylen))
 #define hash_max_load(size)     ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD)
 #define hash_expand(size)       ((size) << 1)
 #define hash_mask(size)         ((size) - 1)
@@ -54,129 +55,167 @@
 #define hash_inc(hash, mask)    ((((hash) >> 32) & (mask)) | 1) /* always odd */
 #define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask))

-static struct hash_tbl_node *alloc_table(size_t newsize)
+static void hash_init(struct hash_table *head)
 {
-    size_t bytes = newsize * sizeof(struct hash_tbl_node);
-    return nasm_zalloc(bytes);
-}
-
-void hash_init(struct hash_table *head, size_t size)
-{
-    nasm_assert(is_power2(size));
-    head->table    = alloc_table(size);
+    head->size     = HASH_INIT_SIZE;
    head->load     = 0;
-    head->size     = size;
-    head->max_load = hash_max_load(size);
+    head->max_load = hash_max_load(head->size);
+    nasm_newn(head->table, head->size);
 }

 /*
- * Find an entry in a hash table.
+ * Find an entry in a hash table.  The key can be any binary object.
 *
 * On failure, if "insert" is non-NULL, store data in that structure
 * which can be used to insert that node using hash_add().
- *
- * WARNING: this data is only valid until the very next call of
- * hash_add(); it cannot be "saved" to a later date.
+ * See hash_add() for constraints on the uses of the insert object.
 *
 * On success, return a pointer to the "data" element of the hash
 * structure.
 */
-void **hash_find(struct hash_table *head, const char *key,
-                 struct hash_insert *insert)
+void **hash_findb(struct hash_table *head, const void *key,
+                  size_t keylen, struct hash_insert *insert)
 {
-    struct hash_tbl_node *np;
-    struct hash_tbl_node *tbl = head->table;
-    uint64_t hash = hash_calc(key);
+    struct hash_node *np = NULL;
+    struct hash_node *tbl = head->table;
+    uint64_t hash = hash_calc(key, keylen);
    size_t mask = hash_mask(head->size);
    size_t pos = hash_pos(hash, mask);
    size_t inc = hash_inc(hash, mask);

-    while ((np = &tbl[pos])->key) {
-        if (hash == np->hash && !strcmp(key, np->key))
-            return &np->data;
-        pos = hash_pos_next(pos, inc, mask);
+    if (likely(tbl)) {
+        while ((np = &tbl[pos])->key) {
+            if (hash == np->hash &&
+                keylen == np->keylen &&
+                !memcmp(key, np->key, keylen))
+                return &np->data;
+            pos = hash_pos_next(pos, inc, mask);
+        }
    }

    /* Not found.  Store info for insert if requested. */
    if (insert) {
+        insert->node.hash = hash;
+        insert->node.key = key;
+        insert->node.keylen = keylen;
+        insert->node.data = NULL;
        insert->head  = head;
-        insert->hash  = hash;
        insert->where = np;
    }
    return NULL;
 }

 /*
- * Same as hash_find, but for case-insensitive hashing.
+ * Same as hash_findb(), but for a C string.
 */
-void **hash_findi(struct hash_table *head, const char *key,
-                  struct hash_insert *insert)
+void **hash_find(struct hash_table *head, const char *key,
+                 struct hash_insert *insert)
 {
-    struct hash_tbl_node *np;
-    struct hash_tbl_node *tbl = head->table;
-    uint64_t hash = hash_calci(key);
+    return hash_findb(head, key, strlen(key)+1, insert);
+}
+
+/*
+ * Same as hash_findb(), but for case-insensitive hashing.
+ */
+void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
+                   struct hash_insert *insert)
+{
+    struct hash_node *np = NULL;
+    struct hash_node *tbl = head->table;
+    uint64_t hash = hash_calci(key, keylen);
    size_t mask = hash_mask(head->size);
    size_t pos = hash_pos(hash, mask);
    size_t inc = hash_inc(hash, mask);

-    while ((np = &tbl[pos])->key) {
-        if (hash == np->hash && !nasm_stricmp(key, np->key))
-            return &np->data;
-        pos = hash_pos_next(pos, inc, mask);
+    if (likely(tbl)) {
+        while ((np = &tbl[pos])->key) {
+            if (hash == np->hash &&
+                keylen == np->keylen &&
+                !nasm_memicmp(key, np->key, keylen))
+                return &np->data;
+            pos = hash_pos_next(pos, inc, mask);
+        }
    }

    /* Not found.  Store info for insert if requested. */
    if (insert) {
+        insert->node.hash = hash;
+        insert->node.key = key;
+        insert->node.keylen = keylen;
+        insert->node.data = NULL;
        insert->head  = head;
-        insert->hash  = hash;
        insert->where = np;
    }
    return NULL;
 }

+/*
+ * Same as hash_find(), but for case-insensitive hashing.
+ */
+void **hash_findi(struct hash_table *head, const char *key,
+                  struct hash_insert *insert)
+{
+    return hash_findib(head, key, strlen(key)+1, insert);
+}
+
 /*
 * Insert node.  Return a pointer to the "data" element of the newly
 * created hash node.
+ *
+ * The following constraints apply:
+ * 1. A call to hash_add() invalidates all other outstanding hash_insert
+ *    objects; attempting to use them causes a wild pointer reference.
+ * 2. The key provided must exactly match the key passed to hash_find*(),
+ *    but it does not have to point to the same storage address. The key
+ *    buffer provided to this function must not be freed for the lifespan
+ *    of the hash. NULL will use the same pointer that was passed to
+ *    hash_find*().
 */
-void **hash_add(struct hash_insert *insert, const char *key, void *data)
+void **hash_add(struct hash_insert *insert, const void *key, void *data)
 {
    struct hash_table *head  = insert->head;
-    struct hash_tbl_node *np = insert->where;
+    struct hash_node *np = insert->where;
+
+    if (unlikely(!np)) {
+        hash_init(head);
+        /* The hash table is empty, so we don't need to iterate here */
+        np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))];
+    }

    /*
     * Insert node.  We can always do this, even if we need to
     * rebalance immediately after.
     */
-    np->hash = insert->hash;
-    np->key  = key;
+    *np = insert->node;
    np->data = data;
+    if (key)
+        np->key = key;

-    if (++head->load > head->max_load) {
+    if (unlikely(++head->load > head->max_load)) {
        /* Need to expand the table */
-        size_t newsize                  = hash_expand(head->size);
-        struct hash_tbl_node *newtbl    = alloc_table(newsize);
-        size_t mask                     = hash_mask(newsize);
+        size_t newsize           = hash_expand(head->size);
+        struct hash_node *newtbl;
+        size_t mask              = hash_mask(newsize);
+        struct hash_node *op, *xp;
+        size_t i;

-        if (head->table) {
-            struct hash_tbl_node *op, *xp;
-            size_t i;
+        nasm_newn(newtbl, newsize);

-            /* Rebalance all the entries */
-            for (i = 0, op = head->table; i < head->size; i++, op++) {
-                if (op->key) {
-                    size_t pos = hash_pos(op->hash, mask);
-                    size_t inc = hash_inc(op->hash, mask);
+        /* Rebalance all the entries */
+        for (i = 0, op = head->table; i < head->size; i++, op++) {
+            if (op->key) {
+                size_t pos = hash_pos(op->hash, mask);
+                size_t inc = hash_inc(op->hash, mask);

-                    while ((xp = &newtbl[pos])->key)
-                        pos = hash_pos_next(pos, inc, mask);
+                while ((xp = &newtbl[pos])->key)
+                    pos = hash_pos_next(pos, inc, mask);

-                    *xp = *op;
-                    if (op == np)
-                        np = xp;
-                }
+                *xp = *op;
+                if (op == np)
+                    np = xp;
            }
-            nasm_free(head->table);
        }
+        nasm_free(head->table);

        head->table    = newtbl;
        head->size     = newsize;
@@ -187,36 +226,30 @@ void **hash_add(struct hash_insert *insert, const char *key, void *data)
 }

 /*
- * Iterate over all members of a hash set.  For the first call,
- * iterator should be initialized to NULL.  Returns the data pointer,
- * or NULL on failure.
+ * Iterate over all members of a hash set. For the first call,
+ * iter->node should be initialized to NULL. Returns a pointer to
+ * a struct hash_node representing the current object, or NULL
+ * if we have reached the end of the hash table; this is the
+ *
+ * Calling hash_add() will invalidate the iterator.
 */
-void *hash_iterate(const struct hash_table *head,
-                   struct hash_tbl_node **iterator,
-                   const char **key)
+const struct hash_node *hash_iterate(struct hash_iterator *iter)
 {
-    struct hash_tbl_node *np = *iterator;
-    struct hash_tbl_node *ep = head->table + head->size;
+    const struct hash_table *head = iter->head;
+    const struct hash_node *cp = iter->next;
+    const struct hash_node *ep = head->table + head->size;

-    if (!np) {
-        np = head->table;
-        if (!np)
-            return NULL;        /* Uninitialized table */
-    }
-
-    while (np < ep) {
+    /* For an empty table, np == ep == NULL */
+    while (cp < ep) {
+        const struct hash_node *np = cp+1;
        if (np->key) {
-            *iterator = np + 1;
-            if (key)
-                *key = np->key;
-            return np->data;
+            iter->next = np;
+            return cp;
        }
-        np++;
+        cp = np;
    }

-    *iterator = NULL;
-    if (key)
-        *key = NULL;
+    iter->next = head->table;
    return NULL;
 }

@@ -229,8 +262,10 @@ void *hash_iterate(const struct hash_table *head,
 void hash_free(struct hash_table *head)
 {
    void *p = head->table;
-    head->table = NULL;
-    nasm_free(p);
+    if (likely(p)) {
+        head->table = NULL;
+        nasm_free(p);
+    }
 }

 /*
@@ -242,14 +277,13 @@ void hash_free(struct hash_table *head)
 */
 void hash_free_all(struct hash_table *head, bool free_keys)
 {
-    struct hash_tbl_node *iter = NULL;
-    const char *keyp;
-    void *d;
+    struct hash_iterator it;
+    const struct hash_node *np;

-    while ((d = hash_iterate(head, &iter, &keyp))) {
-        nasm_free(d);
+    hash_for_each(head, it, np) {
+        nasm_free(np->data);
        if (free_keys)
-            nasm_free((void *)keyp);
+            nasm_free((void *)np->key);
    }

    hash_free(head);
--- a/nasmlib/strlist.c
+++ b/nasmlib/strlist.c
@@ -43,7 +43,6 @@
 struct strlist *strlist_alloc(void)
 {
 	struct strlist *list = nasm_zalloc(sizeof(*list));
-	hash_init(&list->hash, HASH_MEDIUM);
 	list->tailp = &list->head;
 	return list;
 }
@@ -56,20 +55,19 @@ bool strlist_add(struct strlist *list, const char *str)
 {
 	struct strlist_entry *e;
 	struct hash_insert hi;
-	size_t len;
+	size_t size;

 	if (!list)
 		return false;

-	if (hash_find(&list->hash, str, &hi))
+	size = strlen(str) + 1;
+	if (hash_findb(&list->hash, str, size, &hi))
 		return false;

-	len = strlen(str);
-
 	/* Structure already has char[1] as EOS */
-	e = nasm_zalloc(sizeof(*e) + len);
-	e->len = len;
-	memcpy(e->str, str, len + 1);
+	e = nasm_zalloc(sizeof(*e) - 1 + size);
+	e->size = size;
+	memcpy(e->str, str, size);

 	*list->tailp = e;
 	list->tailp = &e->next;
--- a/output/codeview.c
+++ b/output/codeview.c
@@ -177,7 +177,6 @@ static void cv8_init(void)

    cv8_state.source_files = NULL;
    cv8_state.source_files_tail = &cv8_state.source_files;
-    hash_init(&cv8_state.file_hash, HASH_MEDIUM);

    cv8_state.num_files = 0;
    cv8_state.total_filename_len = 0;
--- a/output/outmacho.c
+++ b/output/outmacho.c
@@ -363,7 +363,6 @@ static void macho_init(void)
    strs = saa_init(1L);

    section_by_index = raa_init();
-    hash_init(&section_by_name, HASH_MEDIUM);

    /* string table starts with a zero byte so index 0 is an empty string */
    saa_wbytes(strs, zero_buffer, 1);
--- a/output/strtbl.c
+++ b/output/strtbl.c
@@ -56,7 +56,6 @@ struct strtbl_entry {
 void strtbl_init(struct nasm_strtbl *tbl)
 {
    tbl->size = 0;
-    hash_init(&tbl->hash, HASH_LARGE);
    strtbl_add(tbl, "");       /* Index 0 is always an empty string */
 }

@@ -70,14 +69,13 @@ size_t strtbl_add(struct nasm_strtbl *tbl, const char *str)
    void **sep;
    struct strtbl_entry *se;
    struct hash_insert hi;
+    size_t bytes = strlen(str) + 1;

-    sep = hash_find(&tbl->hash, str, &hi);
+    sep = hash_findb(&tbl->hash, str, bytes, &hi);
    if (sep) {
        se = *sep;
    } else {
-        size_t bytes = strlen(str) + 1;
-
-        se = nasm_malloc(sizeof(struct strtbl_entry)-1+bytes);
+        nasm_new(se);
        se->index = tbl->size;
        tbl->size += bytes;
        se->bytes = bytes;
@@ -107,11 +105,13 @@ size_t strtbl_find(struct nasm_strtbl *tbl, const char *str)
 void *strtbl_generate(const struct nasm_strtbl *tbl)
 {
    char *buf = nasm_malloc(strtbl_size(tbl));
-    struct hash_tbl_node *iter = NULL;
-    struct strtbl_entry *se;
+    struct hash_iterator it;
+    const struct hash_node *np;

-    while ((se = hash_iterate(&tbl->hash, &iter, NULL)))
+    hash_for_each(&tbl->hash, it, np) {
+        struct strtbl_entry *se = np->data;
        memcpy(buf + se->index, se->str, se->bytes);
+    }

    return buf;
 }