878 lines
32 KiB
C
878 lines
32 KiB
C
/** @license 2022 Neil Edelman, distributed under the terms of the
|
|
[MIT License](https://opensource.org/licenses/MIT).
|
|
|
|
@abstract Stand-alone header <src/tree.h>; examples <test/test_tree.c>. On a
|
|
compatible workstation, `make` creates the test suite of the examples.
|
|
|
|
@subtitle Ordered tree
|
|
|
|
A <tag:<B>tree> is an ordered set or map.
|
|
|
|
@param[TREE_NAME, TREE_KEY]
|
|
`<B>` that satisfies `C` naming conventions when mangled, required, and
|
|
`TREE_KEY`, a comparable type, <typedef:<PB>key>, whose default is
|
|
`unsigned int`. `<PB>` is private, whose names are prefixed in a manner to
|
|
avoid collisions.
|
|
|
|
@param[TREE_VALUE]
|
|
`TRIE_VALUE` is an optional payload to go with the type, <typedef:<PB>value>.
|
|
The makes it a map of <tag:<B>tree_entry> instead of a set.
|
|
|
|
@param[TREE_COMPARE]
|
|
A function satisfying <typedef:<PB>compare_fn>. Defaults to ascending order.
|
|
Required if `TREE_KEY` is changed to an incomparable type.
|
|
|
|
@param[TREE_EXPECT_TRAIT]
|
|
Do not un-define certain variables for subsequent inclusion in a parameterized
|
|
trait.
|
|
|
|
@param[TREE_TO_STRING_NAME, TREE_TO_STRING]
|
|
To string trait contained in <to_string.h>; an optional unique `<SZ>`
|
|
that satisfies `C` naming conventions when mangled and function implementing
|
|
<typedef:<PSZ>to_string_fn>.
|
|
|
|
@fixme multi-key; implementation of order statistic tree
|
|
@fixme merge, difference
|
|
|
|
@std C89 */
|
|
|
|
#if !defined(TREE_NAME)
|
|
#error Name TREE_NAME undefined.
|
|
#endif
|
|
#if defined(TREE_TO_STRING_NAME) || defined(TREE_TO_STRING)
|
|
#define TREE_TO_STRING_TRAIT 1
|
|
#else
|
|
#define TREE_TO_STRING_TRAIT 0
|
|
#endif
|
|
#define TREE_TRAITS TREE_TO_STRING_TRAIT
|
|
#if TREE_TRAITS > 1
|
|
#error Only one trait per include is allowed; use TREE_EXPECT_TRAIT.
|
|
#endif
|
|
#if defined(TREE_TO_STRING_NAME) && !defined(TREE_TO_STRING)
|
|
#error TREE_TO_STRING_NAME requires TREE_TO_STRING.
|
|
#endif
|
|
|
|
#ifndef TREE_H /* <!-- idempotent */
|
|
#define TREE_H
|
|
#include <stddef.h> /* fixme: stdlib, string should do it; what is going on? */
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <assert.h>
|
|
#include <limits.h>
|
|
/* <Kernighan and Ritchie, 1988, p. 231>. */
|
|
#if defined(TREE_CAT_) || defined(TREE_CAT) || defined(B_) || defined(PB_) \
|
|
|| defined(TREE_IDLE)
|
|
#error Unexpected defines.
|
|
#endif
|
|
#define TREE_CAT_(n, m) n ## _ ## m
|
|
#define TREE_CAT(n, m) TREE_CAT_(n, m)
|
|
#define B_(n) TREE_CAT(TREE_NAME, n)
|
|
#define PB_(n) TREE_CAT(tree, B_(n))
|
|
/* Leaf: `TREE_MAX type`; branch: `TREE_MAX type + TREE_ORDER pointer`. */
|
|
#define TREE_MAX 2
|
|
#if TREE_MAX < 2 || TREE_MAX > UCHAR_MAX
|
|
#error TREE_MAX parameter range `[3, UCHAR_MAX]`.
|
|
#endif
|
|
/* This is the worst-case branching factor; the performance will be
|
|
\O(log_{`TREE_MIN`+1} `size`). Usually this is `⌈(TREE_MAX+1)/2⌉-1`. However,
|
|
smaller values are less-eager; this has been chosen to provide hysteresis. In
|
|
the extreme, <Johnson, Shasha, 1993, Free-at-Empty> show good results. (Except
|
|
`TREE_MAX 2`, one can be the only value.) */
|
|
#define TREE_MIN (TREE_MAX / 3 ? TREE_MAX / 3 : 1)
|
|
#if TREE_MIN == 0 || TREE_MIN > TREE_MAX / 2
|
|
#error TREE_MIN parameter range `[1, \floor(TREE_MAX / 2)]`.
|
|
#endif
|
|
#define TREE_ORDER (TREE_MAX + 1) /* Maximum degree, (branching factor.) */
|
|
#define TREE_SPLIT (TREE_ORDER / 2) /* Split index: even order left-leaning. */
|
|
#define TREE_RESULT X(ERROR), X(UNIQUE), X(YIELD)
|
|
#define X(n) TREE_##n
|
|
/** A result of modifying the tree, of which `TREE_ERROR` is false.
|
|
![A diagram of the result states.](../doc/put.png) */
|
|
enum tree_result { TREE_RESULT };
|
|
#undef X
|
|
#define X(n) #n
|
|
/** A static array of strings describing the <tag:tree_result>. */
|
|
static const char *const tree_result_str[] = { TREE_RESULT };
|
|
#undef X
|
|
#undef TREE_RESULT
|
|
#endif /* idempotent --> */
|
|
|
|
|
|
#if TREE_TRAITS == 0 /* <!-- base code */
|
|
|
|
|
|
#ifndef TREE_KEY
|
|
#define TREE_KEY unsigned
|
|
#endif
|
|
|
|
/** A comparable type, defaults to `unsigned`. */
|
|
typedef TREE_KEY PB_(key);
|
|
typedef const TREE_KEY PB_(key_c);
|
|
|
|
#ifdef TREE_VALUE
|
|
/** On `TREE_VALUE`, otherwise just a set of <typedef:<PB>key>. */
|
|
typedef TREE_VALUE PB_(value);
|
|
typedef const TREE_VALUE PB_(value_c);
|
|
#endif
|
|
|
|
/** Returns a positive result if `a` is out-of-order with respect to `b`,
|
|
inducing a strict weak order. This is compatible, but less strict then the
|
|
comparators from `bsearch` and `qsort`; it only needs to divide entries
|
|
into two instead of three categories. */
|
|
typedef int (*PB_(compare_fn))(PB_(key_c) a, PB_(key_c) b);
|
|
|
|
#ifndef TREE_COMPARE /* <!-- !cmp */
|
|
/** The default `TREE_COMPARE` on `a` and `b` is integer comparison that
|
|
results in ascending order. @implements <typedef:<PH>compare_fn> */
|
|
static int PB_(default_compare)(PB_(key_c) a, PB_(key_c) b)
|
|
{ return a > b; }
|
|
#define TREE_COMPARE &PB_(default_compare)
|
|
#endif /* !cmp --> */
|
|
|
|
/* Check that `TREE_COMPARE` is a function implementing
|
|
<typedef:<PB>compare_fn>, if defined. */
|
|
static const PB_(compare_fn) PB_(compare) = (TREE_COMPARE);
|
|
|
|
/* B-tree node, as <Bayer, McCreight, 1972, Large>. These rules are more lazy
|
|
than the original so as to not exhibit worst-case behaviour in small trees, as
|
|
<Johnson, Shasha, 1993, Free-at-Empty>, but lookup is potentially slower after
|
|
deleting; this is a design decision that nodes are not cached. In the
|
|
terminology of <Knuth, 1998 Art 3>,
|
|
* Every branch has at most `TREE_ORDER == TREE_MAX + 1` children, which is at
|
|
minimum three.
|
|
* Every non-root and non-bulk-loaded node has at least `TREE_MIN` keys,
|
|
(`⎣TREE_MAX/3⎦`.)
|
|
* Every branch has at least one child, `k`, and contains `k - 1` keys, (this
|
|
is a consequence of the fact that they are implicitly storing a complete
|
|
binary sub-tree.)
|
|
* All leaves are at the maximum depth and height zero; they do'n't carry links
|
|
to other nodes. (The height is one less then the original paper, as
|
|
<Knuth, 1998 Art 3>, for computational simplicity.)
|
|
* There are two empty B-trees to facilitate allocation hysteresis between
|
|
0 -- 1: idle `{ 0, 0 }`, and `{ garbage leaf, UINT_MAX }`, one could test,
|
|
`!root || height == UINT_MAX`.
|
|
* Bulk-loading always is on the right side.
|
|
* A branch node is a specialization of a (leaf) node with children. One can
|
|
tell if it's a branch by the non-zero height. */
|
|
struct PB_(node) {
|
|
unsigned char size; /* `[0, TREE_MAX]`. */
|
|
PB_(key) key[TREE_MAX]; /* Cache-friendly lookup. */
|
|
#ifdef TREE_VALUE
|
|
PB_(value) value[TREE_MAX];
|
|
#endif
|
|
};
|
|
/* B-tree branch is a <tag:<PB>node> and links to `size + 1` nodes. */
|
|
struct PB_(branch) { struct PB_(node) base, *child[TREE_ORDER]; };
|
|
/** @return Upcasts `as_node` to a branch. */
|
|
static struct PB_(branch) *PB_(branch)(struct PB_(node) *const as_leaf)
|
|
{ return (struct PB_(branch) *)(void *)
|
|
((char *)as_leaf - offsetof(struct PB_(branch), base)); }
|
|
/** @return Upcasts `as_node` to a branch. */
|
|
static const struct PB_(branch) *PB_(branch_c)(const struct PB_(node) *
|
|
const as_node) { return (const struct PB_(branch) *)(const void *)
|
|
((const char *)as_node - offsetof(struct PB_(branch), base)); }
|
|
|
|
/* Subtree is a node with a height. */
|
|
struct PB_(sub) { struct PB_(node) *node; unsigned height; };
|
|
/* Address specific entry. */
|
|
struct PB_(ref) { struct PB_(node) *node; unsigned height, idx; };
|
|
struct PB_(ref_c) { const struct PB_(node) *node; unsigned height, idx; };
|
|
|
|
#ifdef TREE_VALUE /* <!-- value */
|
|
|
|
/** On `TREE_VALUE`, creates a map from pointer-to-<typedef:<PB>key> to
|
|
pointer-to-<typedef:<PB>value>. The reason these are pointers is because it
|
|
is not connected in memory. */
|
|
struct B_(tree_entry) { PB_(key) *key; PB_(value) *value; };
|
|
struct B_(tree_entry_c) { PB_(key_c) *key; PB_(value_c) *value; };
|
|
/** On `TREE_VALUE`, otherwise it's just an alias for
|
|
pointer-to-<typedef:<PB>key>. */
|
|
typedef struct B_(tree_entry) PB_(entry);
|
|
typedef struct B_(tree_entry_c) PB_(entry_c);
|
|
static PB_(entry) PB_(null_entry)(void)
|
|
{ const PB_(entry) e = { 0, 0 }; return e; }
|
|
static PB_(entry_c) PB_(null_entry_c)(void)
|
|
{ const PB_(entry_c) e = { 0, 0 }; return e; }
|
|
static PB_(entry) PB_(leaf_to_entry)(struct PB_(node) *const leaf,
|
|
const unsigned i) { PB_(entry) e;
|
|
e.key = leaf->key + i, e.value = leaf->value + i; return e; }
|
|
static PB_(entry_c) PB_(leaf_to_entry_c)(const struct PB_(node) *const leaf,
|
|
const unsigned i) { PB_(entry_c) e;
|
|
e.key = leaf->key + i, e.value = leaf->value + i; return e; }
|
|
static PB_(value) *PB_(ref_to_value)(const struct PB_(ref) ref)
|
|
{ return ref.node ? ref.node->value + ref.idx : 0; }
|
|
|
|
#else /* value --><!-- !value */
|
|
|
|
typedef PB_(key) PB_(value);
|
|
typedef PB_(key) *PB_(entry);
|
|
typedef PB_(key_c) *PB_(entry_c);
|
|
static PB_(entry_c) PB_(null_entry_c)(void) { return 0; }
|
|
static PB_(entry) PB_(null_entry)(void) { return 0; }
|
|
static PB_(entry) PB_(leaf_to_entry)(struct PB_(node) *const leaf,
|
|
const unsigned i) { return leaf->key + i; }
|
|
static PB_(entry_c) PB_(leaf_to_entry_c)(const struct PB_(node) *const leaf,
|
|
const unsigned i) { return leaf->key + i; }
|
|
static PB_(value) *PB_(ref_to_value)(const struct PB_(ref) ref)
|
|
{ return ref.node ? ref.node->key + ref.idx : 0; }
|
|
|
|
#endif /* !value --> */
|
|
|
|
/** To initialize it to an idle state, see <fn:<B>tree>, `TRIE_IDLE`, `{0}`
|
|
(`C99`), or being `static`. This is a B-tree, as
|
|
<Bayer, McCreight, 1972 Large>.
|
|
|
|
![States.](../doc/states.png) */
|
|
struct B_(tree);
|
|
struct B_(tree) { struct PB_(sub) root; };
|
|
|
|
#define BOX_CONTENT PB_(entry_c)
|
|
/** Is `e` not null? @implements `is_element_c` */
|
|
static int PB_(is_element_c)(PB_(entry_c) e) {
|
|
#ifdef TREE_VALUE
|
|
return !!e.key;
|
|
#else
|
|
return !!e;
|
|
#endif
|
|
}
|
|
/* Two copies of the same code, with and without `const`.
|
|
@param[sub] A copy of the tree's root.
|
|
@param[ref] If it has a null node, starts at the first key; if it's past the
|
|
node's limits, uses `sub` to go to the next node.
|
|
@return True unless there are no more `ref`. */
|
|
#define TREE_PIN(pin_c, ref_c) \
|
|
static int PB_(pin_c)(struct PB_(sub) sub, struct PB_(ref_c) *const ref) { \
|
|
struct PB_(ref_c) next; \
|
|
unsigned a0; \
|
|
PB_(key) x; \
|
|
assert(ref); \
|
|
if(!sub.node || sub.height == UINT_MAX) return 0; \
|
|
/* Start. */ \
|
|
if(!ref->node) \
|
|
ref->node = sub.node, ref->height = sub.height, ref->idx = 0; \
|
|
/* Descend. */ \
|
|
while(ref->height) ref->height--, \
|
|
ref->node = PB_(branch_c)(ref->node)->child[ref->idx], ref->idx = 0; \
|
|
if(ref->idx < ref->node->size) return 1; /* Likely. */ \
|
|
/* Empty nodes are always at the end, (when bulk loading.) */ \
|
|
if(!ref->node->size) return 0; \
|
|
/* Re-descend tree and note the minimum height node that has a next key. */\
|
|
for(next.node = 0, x = ref->node->key[ref->node->size - 1]; sub.height; \
|
|
sub.node = PB_(branch_c)(sub.node)->child[a0], sub.height--) { \
|
|
unsigned a1 = sub.node->size; a0 = 0; \
|
|
while(a0 < a1) { \
|
|
const unsigned m = (a0 + a1) / 2; \
|
|
if(PB_(compare)(x, sub.node->key[m]) > 0) a0 = m + 1; else a1 = m; \
|
|
} \
|
|
if(a0 < sub.node->size) \
|
|
next.node = sub.node, next.height = sub.height, next.idx = a0; \
|
|
} \
|
|
if(!next.node) return 0; /* Off the right. */ \
|
|
*ref = next; \
|
|
return 1; /* Jumped nodes. */ \
|
|
}
|
|
TREE_PIN(pin_c, ref_c)
|
|
TREE_PIN(pin, ref)
|
|
#undef TREE_PIN
|
|
/* This could be expanded! */
|
|
|
|
/* A constant iterator. @implements `forward` */
|
|
struct PB_(forward) { const struct PB_(sub) *root; struct PB_(ref_c) ref; };
|
|
/** @return Before `tree`. @implements `forward_begin` */
|
|
static struct PB_(forward) PB_(forward_begin)(const struct B_(tree) *const
|
|
tree) {
|
|
struct PB_(forward) it;
|
|
it.root = tree ? &tree->root : 0, it.ref.node = 0,
|
|
it.ref.height = 0, it.ref.idx = 0;
|
|
return it;
|
|
}
|
|
/** Advances `it` to the next element. @return A pointer to the current
|
|
element or null. @implements `forward_next` */
|
|
static PB_(entry_c) PB_(forward_next)(struct PB_(forward) *const it) {
|
|
return assert(it), PB_(pin_c)(*it->root, &it->ref) ?
|
|
PB_(leaf_to_entry_c)(it->ref.node, it->ref.idx++) : PB_(null_entry_c)();
|
|
}
|
|
|
|
#define BOX_ITERATOR PB_(entry)
|
|
/** Is `x` not null? @implements `is_element` */
|
|
static int PB_(is_element)(const PB_(entry) e) {
|
|
#ifdef TREE_VALUE
|
|
return !!e.key;
|
|
#else
|
|
return !!e;
|
|
#endif
|
|
}
|
|
/* A certain position and the top level tree for backtracking.
|
|
@implements `iterator` */
|
|
struct PB_(iterator) { struct PB_(sub) *root; struct PB_(ref) ref; };
|
|
/** @return Before `tree`. @implements `forward_begin` */
|
|
static struct PB_(iterator) PB_(begin)(struct B_(tree) *const tree) {
|
|
struct PB_(iterator) it;
|
|
it.root = tree ? &tree->root : 0, it.ref.node = 0,
|
|
it.ref.height = 0, it.ref.idx = 0;
|
|
return it;
|
|
}
|
|
/** Advances `it` to the next element. @return A pointer to the current
|
|
element or null. @implements `next` */
|
|
static PB_(entry) PB_(next)(struct PB_(iterator) *const it) {
|
|
return assert(it), PB_(pin)(*it->root, &it->ref) ?
|
|
PB_(leaf_to_entry)(it->ref.node, it->ref.idx++) : PB_(null_entry)();
|
|
}
|
|
|
|
//#include "../test/orcish.h"
|
|
|
|
static void PB_(find_idx)(struct PB_(ref) *const lo, const PB_(key) key) {
|
|
unsigned hi = lo->node->size;
|
|
lo->idx = 0;
|
|
if(!hi) return;
|
|
do {
|
|
const unsigned m = (lo->idx + hi) / 2;
|
|
if(PB_(compare)(key, lo->node->key[m]) > 0) lo->idx = m + 1;
|
|
else hi = m;
|
|
} while(lo->idx < hi);
|
|
}
|
|
|
|
/** Assume `tree` and `x` are checked for non-empty validity. */
|
|
static struct PB_(ref) PB_(lower_r)(struct PB_(sub) *const tree,
|
|
const PB_(key) key, struct PB_(ref) *const unfull, int *const is_equal) {
|
|
struct PB_(ref) lo;
|
|
for(lo.node = tree->node, lo.height = tree->height; ;
|
|
lo.node = PB_(branch_c)(lo.node)->child[lo.idx], lo.height--) {
|
|
unsigned hi = lo.node->size;
|
|
lo.idx = 0;
|
|
if(unfull && hi < TREE_MAX) *unfull = lo;
|
|
if(!hi) continue; /* No nodes; bulk-add? */
|
|
do {
|
|
const unsigned m = (lo.idx + hi) / 2;
|
|
if(PB_(compare)(key, lo.node->key[m]) > 0) lo.idx = m + 1;
|
|
else hi = m;
|
|
} while(lo.idx < hi);
|
|
if(unfull && hi < TREE_MAX) unfull->idx = lo.idx; /* Update. */
|
|
if(!lo.height) break; /* Leaf node. */
|
|
if(lo.idx == lo.node->size) continue; /* Off the end. */
|
|
/* Total order and monotonic, otherwise have to check right. */
|
|
if(PB_(compare)(lo.node->key[lo.idx], key) > 0) continue;
|
|
if(is_equal) *is_equal = 1;
|
|
break;
|
|
}
|
|
return lo;
|
|
}
|
|
|
|
/** @param[tree] Can be null. @return Lower bound of `x` in `tree`.
|
|
@order \O(\log |`tree`|) */
|
|
static struct PB_(ref) PB_(lower)(struct PB_(sub) sub,
|
|
const PB_(key) x, struct PB_(ref) *const unfull, int *const is_equal) {
|
|
if(!sub.node || sub.height == UINT_MAX) {
|
|
struct PB_(ref) ref; ref.node = 0; return ref;
|
|
} else {
|
|
return PB_(lower_r)(&sub, x, unfull, is_equal);
|
|
}
|
|
}
|
|
|
|
/** Clears non-empty `tree` and it's children recursively, but doesn't put it
|
|
to idle or clear pointers. If `one` is valid, tries to keep one leaf. */
|
|
static void PB_(clear_r)(struct PB_(sub) sub, struct PB_(node) **const one) {
|
|
assert(sub.node);
|
|
if(!sub.height) {
|
|
if(one && !*one) *one = sub.node;
|
|
else free(sub.node);
|
|
} else {
|
|
struct PB_(sub) child;
|
|
unsigned i;
|
|
child.height = sub.height - 1;
|
|
for(i = 0; i <= sub.node->size; i++)
|
|
child.node = PB_(branch)(sub.node)->child[i],
|
|
PB_(clear_r)(child, one);
|
|
free(PB_(branch)(sub.node));
|
|
}
|
|
}
|
|
|
|
/* Box override information. */
|
|
#define BOX_ PB_
|
|
#define BOX struct B_(tree)
|
|
|
|
/** Initializes `tree` to idle. @order \Theta(1) @allow */
|
|
static struct B_(tree) B_(tree)(void)
|
|
{ struct B_(tree) tree; tree.root.node = 0; tree.root.height = 0;
|
|
return tree; }
|
|
|
|
/** Returns an initialized `tree` to idle, `tree` can be null. @allow */
|
|
static void B_(tree_)(struct B_(tree) *const tree) {
|
|
if(!tree) return; /* Null. */
|
|
if(!tree->root.node) { /* Idle. */
|
|
assert(!tree->root.height);
|
|
} else if(tree->root.height == UINT_MAX) { /* Empty. */
|
|
assert(tree->root.node); free(tree->root.node);
|
|
} else {
|
|
PB_(clear_r)(tree->root, 0);
|
|
}
|
|
*tree = B_(tree)();
|
|
}
|
|
|
|
/** Stores an iteration in a tree. Generally, changes in the topology of the
|
|
tree invalidate it. */
|
|
struct B_(tree_iterator) { struct PB_(iterator) _; };
|
|
/** @return An iterator before the first element of `tree`. Can be null.
|
|
@allow */
|
|
static struct B_(tree_iterator) B_(tree_begin)(struct B_(tree) *const tree)
|
|
{ struct B_(tree_iterator) it; it._ = PB_(begin)(tree); return it; }
|
|
/** Advances `it` to the next element. @return A pointer to the current
|
|
element or null. @allow */
|
|
static PB_(entry) B_(tree_next)(struct B_(tree_iterator) *const it)
|
|
{ return PB_(next)(&it->_); }
|
|
|
|
/** @param[tree] Can be null. @return Finds the smallest entry in `tree` that
|
|
is at the lower bound of `x`. If `x` is higher than any of `tree`, it will be
|
|
placed just passed the end. @order \O(\log |`tree`|) @allow */
|
|
static struct B_(tree_iterator) B_(tree_lower)(struct B_(tree) *const tree,
|
|
const PB_(key) x) {
|
|
struct B_(tree_iterator) it;
|
|
if(!tree) return it._.root = 0, it;
|
|
it._.ref = PB_(lower)(tree->root, x, 0, 0);
|
|
it._.root = &tree->root;
|
|
return it;
|
|
}
|
|
|
|
/** For example, `tree = { 10 }`, `x = 5 -> 10`, `x = 10 -> 10`,
|
|
`x = 11 -> null`.
|
|
@return Lower-bound value match for `x` in `tree` or null if `x` is greater
|
|
than all in `tree`. @order \O(\log |`tree`|) @allow */
|
|
static PB_(value) *B_(tree_get_next)(struct B_(tree) *const tree,
|
|
const PB_(key) x) {
|
|
struct PB_(ref) ref;
|
|
return tree && (ref = PB_(lower)(tree->root, x, 0, 0),
|
|
PB_(pin)(tree->root, &ref)) ? PB_(ref_to_value)(ref) : 0;
|
|
}
|
|
|
|
//#include "../test/orcish.h"
|
|
static void PB_(print)(const struct B_(tree) *const tree);
|
|
#ifndef TREE_TEST
|
|
static void PB_(print)(const struct B_(tree) *const tree)
|
|
{ (void)tree, printf("not printable\n"); }
|
|
#endif
|
|
|
|
#ifdef TREE_VALUE /* <!-- map */
|
|
/** Packs `key` on the right side of `tree` without doing the usual
|
|
restructuring. This is best followed by <fn:<B>tree_bulk_finish>.
|
|
@param[value] A pointer to the key's value which is set by the function on
|
|
returning true. A null pointer in this parameter causes the value to go
|
|
uninitialized. This parameter is not there if one didn't specify `TREE_VALUE`.
|
|
@return One of <tag:tree_result>: `TREE_ERROR` and `errno` will be set,
|
|
`TREE_YIELD` if the key is already (the highest) in the tree, and
|
|
`TREE_UNIQUE`, added, the `value` (if specified) is uninitialized.
|
|
@throws[EDOM] `x` is smaller than the largest key in `tree`. @throws[malloc] */
|
|
static enum tree_result B_(tree_bulk_add)(struct B_(tree) *const tree,
|
|
PB_(key) key, PB_(value) **const value)
|
|
#else /* map --><!-- set */
|
|
static enum tree_result B_(tree_bulk_add)(struct B_(tree) *const tree,
|
|
PB_(key) key)
|
|
#endif
|
|
{
|
|
struct PB_(node) *node = 0, *head = 0; /* The original and new. */
|
|
assert(tree);
|
|
if(!tree->root.node) { /* Idle tree. */
|
|
assert(!tree->root.height);
|
|
if(!(node = malloc(sizeof *node))) goto catch;
|
|
node->size = 0;
|
|
tree->root.node = node;
|
|
printf("bulk: idle\n");
|
|
} else if(tree->root.height == UINT_MAX) { /* Empty tree. */
|
|
tree->root.height = 0;
|
|
tree->root.node->size = 0;
|
|
printf("bulk: empty\n");
|
|
} else {
|
|
struct PB_(sub) unfull = { 0, 0 };
|
|
unsigned new_nodes, n; /* Count new nodes. */
|
|
struct PB_(node) *tail = 0, *last = 0;
|
|
struct PB_(branch) *pretail = 0;
|
|
struct PB_(sub) scout;
|
|
PB_(key) i;
|
|
printf("bulk: tree...\n"), PB_(print)(tree);
|
|
for(scout = tree->root; ; scout.node = PB_(branch)(scout.node)
|
|
->child[scout.node->size], scout.height--) {
|
|
if(scout.node->size < TREE_MAX) unfull = scout;
|
|
if(scout.node->size) last = scout.node;
|
|
if(!scout.height) break;
|
|
}
|
|
assert(last), i = last->key[last->size - 1];
|
|
/* Verify that the argument is not smaller than the largest. */
|
|
if(PB_(compare)(i, key) > 0) return errno = EDOM, TREE_ERROR;
|
|
if(PB_(compare)(key, i) <= 0) {
|
|
#ifdef TREE_VALUE
|
|
if(value) { /* Last value in the last node. */
|
|
struct PB_(ref) ref;
|
|
ref.node = last, ref.idx = last->size - 1;
|
|
*value = PB_(ref_to_value)(ref);
|
|
}
|
|
#endif
|
|
return TREE_YIELD;
|
|
}
|
|
|
|
/* One leaf, and the rest branches. */
|
|
new_nodes = n = unfull.node ? unfull.height : tree->root.height + 2;
|
|
/*printf("new_nodes: %u, tree height %u\n", new_nodes, tree->height);*/
|
|
if(!n) {
|
|
node = unfull.node;
|
|
} else {
|
|
if(!(node = tail = malloc(sizeof *tail))) goto catch;
|
|
tail->size = 0;
|
|
/*printf("new tail: %s.\n", orcify(tail));*/
|
|
while(--n) {
|
|
struct PB_(branch) *b;
|
|
if(!(b = malloc(sizeof *b))) goto catch;
|
|
b->base.size = 0;
|
|
/*printf("new branch: %s.\n", orcify(b));*/
|
|
if(!head) b->child[0] = 0, pretail = b; /* First loop. */
|
|
else b->child[0] = head; /* Not first loop. */
|
|
head = &b->base;
|
|
}
|
|
}
|
|
|
|
/* Post-error; modify the original as needed. */
|
|
if(pretail) pretail->child[0] = tail;
|
|
else head = node;
|
|
if(!unfull.node) { /* Add tree to head. */
|
|
struct PB_(branch) *const branch = PB_(branch)(head);
|
|
/*printf("adding the existing root, %s to %s\n",
|
|
orcify(tree->root), orcify(head));*/
|
|
assert(new_nodes > 1);
|
|
branch->child[1] = branch->child[0];
|
|
branch->child[0] = tree->root.node;
|
|
node = tree->root.node = head, tree->root.height++;
|
|
} else if(unfull.height) { /* Add head to tree. */
|
|
struct PB_(branch) *const branch = PB_(branch)(node = unfull.node);
|
|
/*printf("adding the linked list, %s to %s at %u\n",
|
|
orcify(head), orcify(inner), inner->base.size + 1);*/
|
|
assert(new_nodes);
|
|
branch->child[branch->base.size + 1] = head;
|
|
}
|
|
}
|
|
assert(node && node->size < TREE_MAX);
|
|
node->key[node->size] = key;
|
|
#ifdef TREE_VALUE
|
|
if(value) {
|
|
struct PB_(ref) ref;
|
|
ref.node = node, ref.idx = node->size;
|
|
*value = PB_(ref_to_value)(ref);
|
|
}
|
|
#endif
|
|
node->size++;
|
|
return TREE_UNIQUE;
|
|
catch:
|
|
free(node); /* Didn't work out. */
|
|
if(head) for( ; ; ) {
|
|
struct PB_(node) *const next = PB_(branch)(head)->child[0];
|
|
free(head); /* Didn't work out. */
|
|
if(!next) break;
|
|
head = next;
|
|
}
|
|
if(!errno) errno = ERANGE;
|
|
return TREE_ERROR;
|
|
}
|
|
|
|
/** Distributes `tree` on the right side so that, after a series of
|
|
<fn:<B>tree_bulk_add>, it will be consistent with the minimum number of keys
|
|
in a node. @return The distribution was a success and all nodes are within
|
|
rules. The only time that it would be false is if, maybe, a regular insertion
|
|
instead of a bulk insertion was performed interspersed with a bulk insertion
|
|
without calling this function. */
|
|
static int B_(tree_bulk_finish)(struct B_(tree) *const tree) {
|
|
struct PB_(sub) s;
|
|
struct PB_(node) *right;
|
|
if(!tree || !tree->root.node || tree->root.height == UINT_MAX) return 1;
|
|
for(s = tree->root; s.height; s.node = right, s.height--) {
|
|
unsigned distribute, right_want, right_move, take_sibling;
|
|
struct PB_(branch) *parent = PB_(branch)(s.node);
|
|
struct PB_(node) *sibling = (assert(parent->base.size),
|
|
parent->child[parent->base.size - 1]);
|
|
right = parent->child[parent->base.size];
|
|
if(TREE_MIN <= right->size) continue; /* Has enough. */
|
|
distribute = sibling->size + right->size;
|
|
if(distribute < 2 * TREE_MIN) return 0;
|
|
right_want = distribute / 2;
|
|
right_move = right_want - right->size;
|
|
take_sibling = right_move - 1;
|
|
/* Either the right has met the properties of a B-tree node, (covered
|
|
above,) or the left sibling is full from bulk-loading (relaxed.) */
|
|
assert(right->size < right_want && right_want >= TREE_MIN
|
|
&& sibling->size - take_sibling >= TREE_MIN + 1);
|
|
/* Move the right node to accept more keys. */
|
|
memmove(right->key + right_move, right->key,
|
|
sizeof *right->key * right->size);
|
|
#ifdef TREE_VALUE
|
|
memmove(right->value + right_move, right->value,
|
|
sizeof *right->value * right->size);
|
|
#endif
|
|
if(s.height > 1) { /* (Parent height.) */
|
|
struct PB_(branch) *rbranch = PB_(branch)(right),
|
|
*sbranch = PB_(branch)(sibling);
|
|
memmove(rbranch->child + right_move, rbranch->child,
|
|
sizeof *rbranch->child * (right->size + 1));
|
|
memcpy(rbranch->child, sbranch->child + sibling->size + 1
|
|
- right_move, sizeof *sbranch->child * right_move);
|
|
}
|
|
right->size += right_move;
|
|
/* Move one node from the parent. */
|
|
memcpy(right->key + take_sibling,
|
|
parent->base.key + parent->base.size - 1, sizeof *right->key);
|
|
#ifdef TREE_VALUE
|
|
memcpy(right->value + take_sibling,
|
|
parent->base.value + parent->base.size - 1, sizeof *right->value);
|
|
#endif
|
|
/* Move the others from the sibling. */
|
|
memcpy(right->key, sibling->key + sibling->size - take_sibling,
|
|
sizeof *right->key * take_sibling);
|
|
#ifdef TREE_VALUE
|
|
memcpy(right->value, sibling->value + sibling->size - take_sibling,
|
|
sizeof *right->value * take_sibling);
|
|
#endif
|
|
sibling->size -= take_sibling;
|
|
/* Sibling's key is now the parent's. */
|
|
memcpy(parent->base.key + parent->base.size - 1,
|
|
sibling->key + sibling->size - 1, sizeof *right->key);
|
|
#ifdef TREE_VALUE
|
|
memcpy(parent->base.value + parent->base.size - 1,
|
|
sibling->value + sibling->size - 1, sizeof *right->value);
|
|
#endif
|
|
sibling->size--;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
#ifdef TREE_VALUE /* <!-- map */
|
|
static enum tree_result B_(tree_add)(struct B_(tree) *const tree,
|
|
PB_(key) key, PB_(value) **const value)
|
|
#else /* map --><!-- set */
|
|
static enum tree_result B_(tree_add)(struct B_(tree) *const tree,
|
|
PB_(key) key)
|
|
#endif
|
|
{
|
|
struct PB_(node) *leaf = 0, *head = 0, **next = 0,
|
|
*sibling;
|
|
unsigned new_nodes, i;
|
|
struct PB_(ref) add, parent, hole, cursor;
|
|
int is_equal;
|
|
assert(tree);
|
|
if(!(add.node = tree->root.node)) goto idle;
|
|
else if(tree->root.height == UINT_MAX) goto empty;
|
|
goto content;
|
|
idle: /* No reserved memory. */
|
|
assert(!add.node && !tree->root.height);
|
|
if(!(add.node = malloc(sizeof *add.node))) goto catch;
|
|
tree->root.node = add.node;
|
|
tree->root.height = UINT_MAX;
|
|
goto empty;
|
|
empty: /* Reserved dynamic memory, but tree is empty. */
|
|
assert(add.node && tree->root.height == UINT_MAX);
|
|
tree->root.height = 0;
|
|
add.node->size = 0;
|
|
add.idx = 0;
|
|
goto insert;
|
|
content: /* Descend the tree; record last node that has space. */
|
|
parent.node = 0, is_equal = 0;
|
|
add = PB_(lower_r)(&tree->root, key, &parent, &is_equal);
|
|
if(is_equal) goto yield; /* Assumes key is unique. */
|
|
if(parent.node == add.node) goto insert; else goto allocate;
|
|
allocate: /* Pre-allocate new nodes in order. */
|
|
new_nodes = parent.node ? parent.height + 1 : tree->root.height + 2;
|
|
for(i = 0; i < new_nodes - 1; i++) {
|
|
struct PB_(branch) *branch;
|
|
if(!(branch = malloc(sizeof *branch))) goto catch;
|
|
branch->base.size = 0;
|
|
if(!head) head = &branch->base; else *next = &branch->base;
|
|
next = branch->child;
|
|
}
|
|
if(!(leaf = malloc(sizeof *leaf))) goto catch;
|
|
leaf->size = 0;
|
|
*next = leaf;
|
|
hole.node = 0;
|
|
if(parent.node) goto split; else goto grow;
|
|
grow: /* Raise tree height with zero-size one-child branch. */
|
|
assert(!parent.node && new_nodes);
|
|
parent.node = head, head = PB_(branch)(head)->child[0], new_nodes--;
|
|
parent.height = ++tree->root.height, parent.idx = 0;
|
|
PB_(branch)(parent.node)->child[0] = tree->root.node;
|
|
tree->root.node = parent.node;
|
|
goto split;
|
|
split: /* Simulates bottom-up, overfull node, split; really this is problematic
|
|
because we don't have parent pointers and we don't have space for overfull
|
|
nodes. So split top-down and leave some nodes blank for filling in next. */
|
|
assert(parent.node && parent.height && parent.node->size < TREE_MAX
|
|
&& new_nodes);
|
|
sibling = head, head = --new_nodes ? PB_(branch)(head)->child[0] : 0;
|
|
cursor.node = PB_(branch)(parent.node)->child[parent.idx];
|
|
cursor.height = parent.height - 1;
|
|
PB_(find_idx)(&cursor, key);
|
|
/* Add one space to unfull parent. This is double-copying when we loop
|
|
around a second time in `⎣(TREE_MAX-1)/2⎦/TREE_MAX` cases; not going to
|
|
optimize this because it would require a lookahead. */
|
|
memmove(parent.node->key + parent.idx + 1, parent.node->key + parent.idx,
|
|
sizeof *parent.node->key * (parent.node->size - parent.idx));
|
|
#ifdef TREE_VALUE
|
|
memmove(parent.node->value + parent.idx + 1,
|
|
parent.node->value + parent.idx,
|
|
sizeof *parent.node->value * (parent.node->size - parent.idx));
|
|
#endif
|
|
assert(0);
|
|
/*{
|
|
struct PB_(branch) *const pbranch = PB_(branch)(parent.node);
|
|
memmove(<#void *__dst#>, <#const void *__src#>, <#size_t __len#>)...
|
|
}*/
|
|
parent.node->size++;
|
|
if(cursor.idx == TREE_SPLIT) { /* Down the middle. */
|
|
printf("down middle\n");
|
|
if(!hole.node) hole = parent; /* Maybe? */
|
|
sibling->size = TREE_MAX - TREE_SPLIT - 1;
|
|
memcpy(sibling->key, cursor.node->key + TREE_SPLIT,
|
|
sizeof *sibling->key * (TREE_MAX - TREE_SPLIT));
|
|
#ifdef TREE_VALUE
|
|
memcpy(sibling->value, cursor.node->value + TREE_SPLIT,
|
|
sizeof *sibling->value * (TREE_MAX - TREE_SPLIT));
|
|
#endif
|
|
if(cursor.height) {
|
|
assert(0);
|
|
}
|
|
} else if(cursor.idx < TREE_SPLIT) {
|
|
assert(0);
|
|
} else /* Greater than. */ {
|
|
assert(0);
|
|
}
|
|
//found.node = tree->root.node, PB_(find_idx)(&found, key);
|
|
assert(0);
|
|
insert: /* `add` is referencing an unfull node that we want to insert. */
|
|
assert(add.node && add.idx <= add.node->size && add.node->size < TREE_MAX);
|
|
memmove(add.node->key + add.idx + 1, add.node->key + add.idx,
|
|
sizeof *add.node->key * (add.node->size - add.idx));
|
|
#ifdef TREE_VALUE
|
|
memmove(add.node->value + add.idx + 1, add.node->value + add.idx,
|
|
sizeof *add.node->value * (add.node->size - add.idx));
|
|
#endif
|
|
add.node->size++;
|
|
add.node->key[add.idx] = key;
|
|
goto unique;
|
|
yield: /* `add` is an existing value. */
|
|
#ifdef TREE_VALUE
|
|
if(value) *value = PB_(ref_to_value)(add);
|
|
#endif
|
|
return TREE_YIELD;
|
|
unique: /* `add` is a new value. */
|
|
#ifdef TREE_VALUE
|
|
if(value) *value = PB_(ref_to_value)(add);
|
|
#endif
|
|
return TREE_UNIQUE;
|
|
catch:
|
|
while(head) {
|
|
struct PB_(branch) *const branch = PB_(branch)(head);
|
|
head = branch->child[0];
|
|
}
|
|
if(!errno) errno = ERANGE;
|
|
return TREE_ERROR;
|
|
}
|
|
|
|
#if 0
|
|
/** Updates or adds a pointer to `x` into `trie`.
|
|
@param[eject] If not null, on success it will hold the overwritten value or
|
|
a pointer-to-null if it did not overwrite any value.
|
|
@return Success. @throws[realloc, ERANGE] @order \O(|`key`|) @allow */
|
|
static int B_(trie_put)(struct B_(trie) *const trie, const PB_(entry) x,
|
|
PB_(entry) */*const fixme*/eject)
|
|
{ return assert(trie && x), PB_(put)(trie, x, &eject, 0); }
|
|
|
|
/** Adds a pointer to `x` to `trie` only if the entry is absent or if calling
|
|
`replace` returns true or is null.
|
|
@param[eject] If not null, on success it will hold the overwritten value or
|
|
a pointer-to-null if it did not overwrite any value. If a collision occurs and
|
|
`replace` does not return true, this will be a pointer to `x`.
|
|
@param[replace] Called on collision and only replaces it if the function
|
|
returns true. If null, it is semantically equivalent to <fn:<T>trie_put>.
|
|
@return Success. @throws[realloc, ERANGE] @order \O(|`key`|) @allow */
|
|
static int B_(trie_policy)(struct B_(trie) *const trie, const PB_(entry) x,
|
|
PB_(entry) */*const*/ eject, const PB_(replace_fn) replace)
|
|
{ return assert(trie && x), PB_(put)(trie, x, &eject, replace); }
|
|
|
|
/** Tries to remove `key` from `trie`. @return Success. */
|
|
static int B_(trie_remove)(struct B_(trie) *const trie,
|
|
const char *const key) { return PB_(remove)(trie, key); }
|
|
#endif
|
|
|
|
#ifdef TREE_TEST /* <!-- test */
|
|
/* Forward-declare. */
|
|
static void (*PB_(to_string))(PB_(entry_c), char (*)[12]);
|
|
static const char *(*PB_(tree_to_string))(const struct B_(tree) *);
|
|
#include "../test/test_tree.h"
|
|
#endif /* test --> */
|
|
|
|
static void PB_(unused_base_coda)(void);
|
|
static void PB_(unused_base)(void) {
|
|
PB_(key) k;
|
|
memset(&k, 0, sizeof k);
|
|
PB_(is_element_c); PB_(forward_begin); PB_(forward_next);
|
|
PB_(is_element);
|
|
B_(tree)(); B_(tree_)(0); B_(tree_begin)(0); B_(tree_next)(0);
|
|
B_(tree_lower)(0, k);
|
|
B_(tree_get_next)(0, k);
|
|
#ifdef TREE_VALUE
|
|
B_(tree_bulk_add)(0, k, 0);
|
|
B_(tree_add)(0, k, 0);
|
|
#else
|
|
B_(tree_bulk_add)(0, k);
|
|
B_(tree_add)(0, k);
|
|
#endif
|
|
B_(tree_bulk_finish)(0);
|
|
PB_(unused_base_coda)();
|
|
}
|
|
static void PB_(unused_base_coda)(void) { PB_(unused_base)(); }
|
|
|
|
|
|
#elif defined(TREE_TO_STRING) /* base code --><!-- to string trait */
|
|
|
|
|
|
#ifdef TREE_TO_STRING_NAME
|
|
#define STR_(n) TREE_CAT(B_(tree), TREE_CAT(TREE_TO_STRING_NAME, n))
|
|
#else
|
|
#define STR_(n) TREE_CAT(B_(tree), n)
|
|
#endif
|
|
#define TO_STRING TREE_TO_STRING
|
|
#define TO_STRING_LEFT '{'
|
|
#define TO_STRING_RIGHT '}'
|
|
#include "to_string.h" /** \include */
|
|
#ifdef TREE_TEST /* <!-- expect: greedy satisfy forward-declared. */
|
|
#undef TREE_TEST
|
|
static PSTR_(to_string_fn) PB_(to_string) = PSTR_(to_string);
|
|
static const char *(*PB_(tree_to_string))(const struct B_(tree) *)
|
|
= &STR_(to_string);
|
|
#endif /* expect --> */
|
|
#undef STR_
|
|
#undef TREE_TO_STRING
|
|
#ifdef TREE_TO_STRING_NAME
|
|
#undef TREE_TO_STRING_NAME
|
|
#endif
|
|
|
|
|
|
#endif /* traits --> */
|
|
|
|
|
|
#ifdef TREE_EXPECT_TRAIT /* <!-- trait */
|
|
#undef TREE_EXPECT_TRAIT
|
|
#else /* trait --><!-- !trait */
|
|
#ifdef TREE_TEST
|
|
#error No TREE_TO_STRING traits defined for TREE_TEST.
|
|
#endif
|
|
#undef TREE_NAME
|
|
#undef TREE_KEY
|
|
#undef TREE_COMPARE
|
|
#ifdef TREE_VALUE
|
|
#undef TREE_VALUE
|
|
#endif
|
|
#ifdef TREE_TEST
|
|
#undef TREE_TEST
|
|
#endif
|
|
#undef BOX_
|
|
#undef BOX
|
|
#undef BOX_CONTENT
|
|
#undef BOX_ITERATOR
|
|
#endif /* !trait --> */
|
|
#undef TREE_TO_STRING_TRAIT
|
|
#undef TREE_TRAITS
|