0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00

hashtbl: revamp the hash table interface, support binary keys

Add binary key support to the hash table interface. Clean up the
interface to contain less extraneous crud.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin (Intel)
2018-12-11 12:30:25 -08:00
parent ddb290681e
commit ebb05a0e5f
11 changed files with 214 additions and 151 deletions

View File

@@ -545,8 +545,6 @@ void backend_label(const char *label, int32_t segment, int64_t offset)
int init_labels(void) int init_labels(void)
{ {
hash_init(&ltab, HASH_LARGE);
ldata = lfree = nasm_malloc(LBLK_SIZE); ldata = lfree = nasm_malloc(LBLK_SIZE);
init_block(lfree); init_block(lfree);

View File

@@ -623,12 +623,13 @@ static void free_mmacro(MMacro * m)
*/ */
static void free_smacro_table(struct hash_table *smt) static void free_smacro_table(struct hash_table *smt)
{ {
SMacro *s, *tmp; struct hash_iterator it;
const char *key; const struct hash_node *np;
struct hash_tbl_node *it = NULL;
while ((s = hash_iterate(smt, &it, &key)) != NULL) { hash_for_each(smt, it, np) {
nasm_free((void *)key); SMacro *tmp;
SMacro *s = np->data;
nasm_free((void *)np->key);
list_for_each_safe(s, tmp, s) { list_for_each_safe(s, tmp, s) {
nasm_free(s->name); nasm_free(s->name);
free_tlist(s->expansion); free_tlist(s->expansion);
@@ -640,13 +641,13 @@ static void free_smacro_table(struct hash_table *smt)
static void free_mmacro_table(struct hash_table *mmt) static void free_mmacro_table(struct hash_table *mmt)
{ {
MMacro *m, *tmp; struct hash_iterator it;
const char *key; const struct hash_node *np;
struct hash_tbl_node *it = NULL;
it = NULL; hash_for_each(mmt, it, np) {
while ((m = hash_iterate(mmt, &it, &key)) != NULL) { MMacro *tmp;
nasm_free((void *)key); MMacro *m = np->data;
nasm_free((void *)np->key);
list_for_each_safe(m, tmp, m) list_for_each_safe(m, tmp, m)
free_mmacro(m); free_mmacro(m);
} }
@@ -664,8 +665,6 @@ static void free_macros(void)
*/ */
static void init_macros(void) static void init_macros(void)
{ {
hash_init(&smacros, HASH_LARGE);
hash_init(&mmacros, HASH_LARGE);
} }
/* /*
@@ -691,12 +690,14 @@ hash_findi_add(struct hash_table *hash, const char *str)
struct hash_insert hi; struct hash_insert hi;
void **r; void **r;
char *strx; char *strx;
size_t l = strlen(str) + 1;
r = hash_findi(hash, str, &hi); r = hash_findib(hash, str, l, &hi);
if (r) if (r)
return r; return r;
strx = nasm_strdup(str); /* Use a more efficient allocator here? */ strx = nasm_malloc(l); /* Use a more efficient allocator here? */
memcpy(strx, str, l);
return hash_add(&hi, strx, NULL); return hash_add(&hi, strx, NULL);
} }
@@ -2624,9 +2625,8 @@ static int do_directive(Token *tline, char **output)
} }
if (i == PP_PUSH) { if (i == PP_PUSH) {
ctx = nasm_malloc(sizeof(Context)); nasm_new(ctx);
ctx->next = cstk; ctx->next = cstk;
hash_init(&ctx->localmac, HASH_SMALL);
ctx->name = p; ctx->name = p;
ctx->number = unique++; ctx->number = unique++;
cstk = ctx; cstk = ctx;
@@ -4932,7 +4932,6 @@ pp_reset(const char *file, int apass, struct strlist *dep_list)
static void pp_init(void) static void pp_init(void)
{ {
hash_init(&FileHash, HASH_MEDIUM);
} }
static char *pp_getline(void) static char *pp_getline(void)

View File

@@ -50,7 +50,6 @@ static struct hash_table filename_hash;
void src_init(void) void src_init(void)
{ {
hash_init(&filename_hash, HASH_MEDIUM);
} }
void src_free(void) void src_free(void)

View File

@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- * /* ----------------------------------------------------------------------- *
* *
* Copyright 1996-2017 The NASM Authors - All Rights Reserved * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for * See the file AUTHORS included with the NASM distribution for
* the specific copyright holders. * the specific copyright holders.
* *
@@ -43,43 +43,58 @@
#include <stddef.h> #include <stddef.h>
#include "nasmlib.h" #include "nasmlib.h"
struct hash_tbl_node { struct hash_node {
uint64_t hash; uint64_t hash;
const char *key; const void *key;
size_t keylen;
void *data; void *data;
}; };
struct hash_table { struct hash_table {
struct hash_tbl_node *table; struct hash_node *table;
size_t load; size_t load;
size_t size; size_t size;
size_t max_load; size_t max_load;
}; };
struct hash_insert { struct hash_insert {
uint64_t hash;
struct hash_table *head; struct hash_table *head;
struct hash_tbl_node *where; struct hash_node *where;
struct hash_node node;
};
struct hash_iterator {
const struct hash_table *head;
const struct hash_node *next;
}; };
uint64_t crc64(uint64_t crc, const char *string); uint64_t crc64(uint64_t crc, const char *string);
uint64_t crc64i(uint64_t crc, const char *string); uint64_t crc64i(uint64_t crc, const char *string);
uint64_t crc64b(uint64_t crc, const void *data, size_t len);
uint64_t crc64ib(uint64_t crc, const void *data, size_t len);
#define CRC64_INIT UINT64_C(0xffffffffffffffff) #define CRC64_INIT UINT64_C(0xffffffffffffffff)
/* Some reasonable initial sizes... */
#define HASH_SMALL 4
#define HASH_MEDIUM 16
#define HASH_LARGE 256
void hash_init(struct hash_table *head, size_t size);
void **hash_find(struct hash_table *head, const char *string, void **hash_find(struct hash_table *head, const char *string,
struct hash_insert *insert); struct hash_insert *insert);
void **hash_findb(struct hash_table *head, const void *key, size_t keylen,
struct hash_insert *insert);
void **hash_findi(struct hash_table *head, const char *string, void **hash_findi(struct hash_table *head, const char *string,
struct hash_insert *insert); struct hash_insert *insert);
void **hash_add(struct hash_insert *insert, const char *string, void *data); void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
void *hash_iterate(const struct hash_table *head, struct hash_insert *insert);
struct hash_tbl_node **iterator, void **hash_add(struct hash_insert *insert, const void *key, void *data);
const char **key); static inline void hash_iterator_init(const struct hash_table *head,
struct hash_iterator *iterator)
{
iterator->head = head;
iterator->next = head->table;
}
const struct hash_node *hash_iterate(struct hash_iterator *iterator);
#define hash_for_each(_head,_it,_np) \
for (hash_iterator_init((_head), &(_it)), (_np) = hash_iterate(&(_it)) ; \
(_np) ; (_np) = hash_iterate(&(_it)))
void hash_free(struct hash_table *head); void hash_free(struct hash_table *head);
void hash_free_all(struct hash_table *head, bool free_keys); void hash_free_all(struct hash_table *head, bool free_keys);

View File

@@ -44,7 +44,7 @@
struct strlist_entry { struct strlist_entry {
struct strlist_entry *next; struct strlist_entry *next;
size_t len; size_t size;
char str[1]; char str[1];
}; };

View File

@@ -187,3 +187,25 @@ uint64_t crc64i(uint64_t crc, const char *str)
return crc; return crc;
} }
uint64_t crc64b(uint64_t crc, const void *data, size_t len)
{
const uint8_t *str = data;
while (len--) {
crc = crc64_tab[(uint8_t)crc ^ *str++] ^ (crc >> 8);
}
return crc;
}
uint64_t crc64ib(uint64_t crc, const void *data, size_t len)
{
const uint8_t *str = data;
while (len--) {
crc = crc64_tab[(uint8_t)crc ^ nasm_tolower(*str++)] ^ (crc >> 8);
}
return crc;
}

View File

@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- * /* ----------------------------------------------------------------------- *
* *
* Copyright 1996-2009 The NASM Authors - All Rights Reserved * Copyright 1996-2018 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for * See the file AUTHORS included with the NASM distribution for
* the specific copyright holders. * the specific copyright holders.
* *
@@ -44,9 +44,10 @@
#include "hashtbl.h" #include "hashtbl.h"
#define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */ #define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */
#define HASH_INIT_SIZE 16 /* Initial size (power of 2, min 4) */
#define hash_calc(key) crc64(CRC64_INIT, (key)) #define hash_calc(key,keylen) crc64b(CRC64_INIT, (key), (keylen))
#define hash_calci(key) crc64i(CRC64_INIT, (key)) #define hash_calci(key,keylen) crc64ib(CRC64_INIT, (key), (keylen))
#define hash_max_load(size) ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD) #define hash_max_load(size) ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD)
#define hash_expand(size) ((size) << 1) #define hash_expand(size) ((size) << 1)
#define hash_mask(size) ((size) - 1) #define hash_mask(size) ((size) - 1)
@@ -54,113 +55,152 @@
#define hash_inc(hash, mask) ((((hash) >> 32) & (mask)) | 1) /* always odd */ #define hash_inc(hash, mask) ((((hash) >> 32) & (mask)) | 1) /* always odd */
#define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask)) #define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask))
static struct hash_tbl_node *alloc_table(size_t newsize) static void hash_init(struct hash_table *head)
{ {
size_t bytes = newsize * sizeof(struct hash_tbl_node); head->size = HASH_INIT_SIZE;
return nasm_zalloc(bytes);
}
void hash_init(struct hash_table *head, size_t size)
{
nasm_assert(is_power2(size));
head->table = alloc_table(size);
head->load = 0; head->load = 0;
head->size = size; head->max_load = hash_max_load(head->size);
head->max_load = hash_max_load(size); nasm_newn(head->table, head->size);
} }
/* /*
* Find an entry in a hash table. * Find an entry in a hash table. The key can be any binary object.
* *
* On failure, if "insert" is non-NULL, store data in that structure * On failure, if "insert" is non-NULL, store data in that structure
* which can be used to insert that node using hash_add(). * which can be used to insert that node using hash_add().
* * See hash_add() for constraints on the uses of the insert object.
* WARNING: this data is only valid until the very next call of
* hash_add(); it cannot be "saved" to a later date.
* *
* On success, return a pointer to the "data" element of the hash * On success, return a pointer to the "data" element of the hash
* structure. * structure.
*/ */
void **hash_find(struct hash_table *head, const char *key, void **hash_findb(struct hash_table *head, const void *key,
struct hash_insert *insert) size_t keylen, struct hash_insert *insert)
{ {
struct hash_tbl_node *np; struct hash_node *np = NULL;
struct hash_tbl_node *tbl = head->table; struct hash_node *tbl = head->table;
uint64_t hash = hash_calc(key); uint64_t hash = hash_calc(key, keylen);
size_t mask = hash_mask(head->size); size_t mask = hash_mask(head->size);
size_t pos = hash_pos(hash, mask); size_t pos = hash_pos(hash, mask);
size_t inc = hash_inc(hash, mask); size_t inc = hash_inc(hash, mask);
if (likely(tbl)) {
while ((np = &tbl[pos])->key) { while ((np = &tbl[pos])->key) {
if (hash == np->hash && !strcmp(key, np->key)) if (hash == np->hash &&
keylen == np->keylen &&
!memcmp(key, np->key, keylen))
return &np->data; return &np->data;
pos = hash_pos_next(pos, inc, mask); pos = hash_pos_next(pos, inc, mask);
} }
}
/* Not found. Store info for insert if requested. */ /* Not found. Store info for insert if requested. */
if (insert) { if (insert) {
insert->node.hash = hash;
insert->node.key = key;
insert->node.keylen = keylen;
insert->node.data = NULL;
insert->head = head; insert->head = head;
insert->hash = hash;
insert->where = np; insert->where = np;
} }
return NULL; return NULL;
} }
/* /*
* Same as hash_find, but for case-insensitive hashing. * Same as hash_findb(), but for a C string.
*/ */
void **hash_findi(struct hash_table *head, const char *key, void **hash_find(struct hash_table *head, const char *key,
struct hash_insert *insert) struct hash_insert *insert)
{ {
struct hash_tbl_node *np; return hash_findb(head, key, strlen(key)+1, insert);
struct hash_tbl_node *tbl = head->table; }
uint64_t hash = hash_calci(key);
/*
* Same as hash_findb(), but for case-insensitive hashing.
*/
void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
struct hash_insert *insert)
{
struct hash_node *np = NULL;
struct hash_node *tbl = head->table;
uint64_t hash = hash_calci(key, keylen);
size_t mask = hash_mask(head->size); size_t mask = hash_mask(head->size);
size_t pos = hash_pos(hash, mask); size_t pos = hash_pos(hash, mask);
size_t inc = hash_inc(hash, mask); size_t inc = hash_inc(hash, mask);
if (likely(tbl)) {
while ((np = &tbl[pos])->key) { while ((np = &tbl[pos])->key) {
if (hash == np->hash && !nasm_stricmp(key, np->key)) if (hash == np->hash &&
keylen == np->keylen &&
!nasm_memicmp(key, np->key, keylen))
return &np->data; return &np->data;
pos = hash_pos_next(pos, inc, mask); pos = hash_pos_next(pos, inc, mask);
} }
}
/* Not found. Store info for insert if requested. */ /* Not found. Store info for insert if requested. */
if (insert) { if (insert) {
insert->node.hash = hash;
insert->node.key = key;
insert->node.keylen = keylen;
insert->node.data = NULL;
insert->head = head; insert->head = head;
insert->hash = hash;
insert->where = np; insert->where = np;
} }
return NULL; return NULL;
} }
/*
* Same as hash_find(), but for case-insensitive hashing.
*/
void **hash_findi(struct hash_table *head, const char *key,
struct hash_insert *insert)
{
return hash_findib(head, key, strlen(key)+1, insert);
}
/* /*
* Insert node. Return a pointer to the "data" element of the newly * Insert node. Return a pointer to the "data" element of the newly
* created hash node. * created hash node.
*
* The following constraints apply:
* 1. A call to hash_add() invalidates all other outstanding hash_insert
* objects; attempting to use them causes a wild pointer reference.
* 2. The key provided must exactly match the key passed to hash_find*(),
* but it does not have to point to the same storage address. The key
* buffer provided to this function must not be freed for the lifespan
* of the hash. NULL will use the same pointer that was passed to
* hash_find*().
*/ */
void **hash_add(struct hash_insert *insert, const char *key, void *data) void **hash_add(struct hash_insert *insert, const void *key, void *data)
{ {
struct hash_table *head = insert->head; struct hash_table *head = insert->head;
struct hash_tbl_node *np = insert->where; struct hash_node *np = insert->where;
if (unlikely(!np)) {
hash_init(head);
/* The hash table is empty, so we don't need to iterate here */
np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))];
}
/* /*
* Insert node. We can always do this, even if we need to * Insert node. We can always do this, even if we need to
* rebalance immediately after. * rebalance immediately after.
*/ */
np->hash = insert->hash; *np = insert->node;
np->key = key;
np->data = data; np->data = data;
if (key)
np->key = key;
if (++head->load > head->max_load) { if (unlikely(++head->load > head->max_load)) {
/* Need to expand the table */ /* Need to expand the table */
size_t newsize = hash_expand(head->size); size_t newsize = hash_expand(head->size);
struct hash_tbl_node *newtbl = alloc_table(newsize); struct hash_node *newtbl;
size_t mask = hash_mask(newsize); size_t mask = hash_mask(newsize);
struct hash_node *op, *xp;
if (head->table) {
struct hash_tbl_node *op, *xp;
size_t i; size_t i;
nasm_newn(newtbl, newsize);
/* Rebalance all the entries */ /* Rebalance all the entries */
for (i = 0, op = head->table; i < head->size; i++, op++) { for (i = 0, op = head->table; i < head->size; i++, op++) {
if (op->key) { if (op->key) {
@@ -176,7 +216,6 @@ void **hash_add(struct hash_insert *insert, const char *key, void *data)
} }
} }
nasm_free(head->table); nasm_free(head->table);
}
head->table = newtbl; head->table = newtbl;
head->size = newsize; head->size = newsize;
@@ -188,35 +227,29 @@ void **hash_add(struct hash_insert *insert, const char *key, void *data)
/* /*
* Iterate over all members of a hash set. For the first call, * Iterate over all members of a hash set. For the first call,
* iterator should be initialized to NULL. Returns the data pointer, * iter->node should be initialized to NULL. Returns a pointer to
* or NULL on failure. * a struct hash_node representing the current object, or NULL
* if we have reached the end of the hash table; this is the
*
* Calling hash_add() will invalidate the iterator.
*/ */
void *hash_iterate(const struct hash_table *head, const struct hash_node *hash_iterate(struct hash_iterator *iter)
struct hash_tbl_node **iterator,
const char **key)
{ {
struct hash_tbl_node *np = *iterator; const struct hash_table *head = iter->head;
struct hash_tbl_node *ep = head->table + head->size; const struct hash_node *cp = iter->next;
const struct hash_node *ep = head->table + head->size;
if (!np) { /* For an empty table, np == ep == NULL */
np = head->table; while (cp < ep) {
if (!np) const struct hash_node *np = cp+1;
return NULL; /* Uninitialized table */
}
while (np < ep) {
if (np->key) { if (np->key) {
*iterator = np + 1; iter->next = np;
if (key) return cp;
*key = np->key;
return np->data;
} }
np++; cp = np;
} }
*iterator = NULL; iter->next = head->table;
if (key)
*key = NULL;
return NULL; return NULL;
} }
@@ -229,9 +262,11 @@ void *hash_iterate(const struct hash_table *head,
void hash_free(struct hash_table *head) void hash_free(struct hash_table *head)
{ {
void *p = head->table; void *p = head->table;
if (likely(p)) {
head->table = NULL; head->table = NULL;
nasm_free(p); nasm_free(p);
} }
}
/* /*
* Frees the hash *and* all data elements. This is applicable only in * Frees the hash *and* all data elements. This is applicable only in
@@ -242,14 +277,13 @@ void hash_free(struct hash_table *head)
*/ */
void hash_free_all(struct hash_table *head, bool free_keys) void hash_free_all(struct hash_table *head, bool free_keys)
{ {
struct hash_tbl_node *iter = NULL; struct hash_iterator it;
const char *keyp; const struct hash_node *np;
void *d;
while ((d = hash_iterate(head, &iter, &keyp))) { hash_for_each(head, it, np) {
nasm_free(d); nasm_free(np->data);
if (free_keys) if (free_keys)
nasm_free((void *)keyp); nasm_free((void *)np->key);
} }
hash_free(head); hash_free(head);

View File

@@ -43,7 +43,6 @@
struct strlist *strlist_alloc(void) struct strlist *strlist_alloc(void)
{ {
struct strlist *list = nasm_zalloc(sizeof(*list)); struct strlist *list = nasm_zalloc(sizeof(*list));
hash_init(&list->hash, HASH_MEDIUM);
list->tailp = &list->head; list->tailp = &list->head;
return list; return list;
} }
@@ -56,20 +55,19 @@ bool strlist_add(struct strlist *list, const char *str)
{ {
struct strlist_entry *e; struct strlist_entry *e;
struct hash_insert hi; struct hash_insert hi;
size_t len; size_t size;
if (!list) if (!list)
return false; return false;
if (hash_find(&list->hash, str, &hi)) size = strlen(str) + 1;
if (hash_findb(&list->hash, str, size, &hi))
return false; return false;
len = strlen(str);
/* Structure already has char[1] as EOS */ /* Structure already has char[1] as EOS */
e = nasm_zalloc(sizeof(*e) + len); e = nasm_zalloc(sizeof(*e) - 1 + size);
e->len = len; e->size = size;
memcpy(e->str, str, len + 1); memcpy(e->str, str, size);
*list->tailp = e; *list->tailp = e;
list->tailp = &e->next; list->tailp = &e->next;

View File

@@ -177,7 +177,6 @@ static void cv8_init(void)
cv8_state.source_files = NULL; cv8_state.source_files = NULL;
cv8_state.source_files_tail = &cv8_state.source_files; cv8_state.source_files_tail = &cv8_state.source_files;
hash_init(&cv8_state.file_hash, HASH_MEDIUM);
cv8_state.num_files = 0; cv8_state.num_files = 0;
cv8_state.total_filename_len = 0; cv8_state.total_filename_len = 0;

View File

@@ -363,7 +363,6 @@ static void macho_init(void)
strs = saa_init(1L); strs = saa_init(1L);
section_by_index = raa_init(); section_by_index = raa_init();
hash_init(&section_by_name, HASH_MEDIUM);
/* string table starts with a zero byte so index 0 is an empty string */ /* string table starts with a zero byte so index 0 is an empty string */
saa_wbytes(strs, zero_buffer, 1); saa_wbytes(strs, zero_buffer, 1);

View File

@@ -56,7 +56,6 @@ struct strtbl_entry {
void strtbl_init(struct nasm_strtbl *tbl) void strtbl_init(struct nasm_strtbl *tbl)
{ {
tbl->size = 0; tbl->size = 0;
hash_init(&tbl->hash, HASH_LARGE);
strtbl_add(tbl, ""); /* Index 0 is always an empty string */ strtbl_add(tbl, ""); /* Index 0 is always an empty string */
} }
@@ -70,14 +69,13 @@ size_t strtbl_add(struct nasm_strtbl *tbl, const char *str)
void **sep; void **sep;
struct strtbl_entry *se; struct strtbl_entry *se;
struct hash_insert hi; struct hash_insert hi;
size_t bytes = strlen(str) + 1;
sep = hash_find(&tbl->hash, str, &hi); sep = hash_findb(&tbl->hash, str, bytes, &hi);
if (sep) { if (sep) {
se = *sep; se = *sep;
} else { } else {
size_t bytes = strlen(str) + 1; nasm_new(se);
se = nasm_malloc(sizeof(struct strtbl_entry)-1+bytes);
se->index = tbl->size; se->index = tbl->size;
tbl->size += bytes; tbl->size += bytes;
se->bytes = bytes; se->bytes = bytes;
@@ -107,11 +105,13 @@ size_t strtbl_find(struct nasm_strtbl *tbl, const char *str)
void *strtbl_generate(const struct nasm_strtbl *tbl) void *strtbl_generate(const struct nasm_strtbl *tbl)
{ {
char *buf = nasm_malloc(strtbl_size(tbl)); char *buf = nasm_malloc(strtbl_size(tbl));
struct hash_tbl_node *iter = NULL; struct hash_iterator it;
struct strtbl_entry *se; const struct hash_node *np;
while ((se = hash_iterate(&tbl->hash, &iter, NULL))) hash_for_each(&tbl->hash, it, np) {
struct strtbl_entry *se = np->data;
memcpy(buf + se->index, se->str, se->bytes); memcpy(buf + se->index, se->str, se->bytes);
}
return buf; return buf;
} }