1360 lines
49 KiB
C
1360 lines
49 KiB
C
/** @license 2022 Neil Edelman, distributed under the terms of the
|
|
[MIT License](https://opensource.org/licenses/MIT).
|
|
|
|
@abstract Stand-alone header <src/tree.h>; examples <test/test_tree.c>. On a
|
|
compatible workstation, `make` creates the test suite of the examples.
|
|
|
|
@subtitle Ordered key-tree
|
|
|
|
A <tag:<B>tree> is an ordered set or map contained in a tree. For memory
|
|
locality, this is implemented B-tree, described in
|
|
<Bayer, McCreight, 1972, Large>.
|
|
|
|
@param[TREE_NAME, TREE_KEY]
|
|
`<B>` that satisfies `C` naming conventions when mangled, required, and
|
|
`TREE_KEY`, a comparable type, <typedef:<PB>key>, whose default is
|
|
`unsigned int`. `<PB>` is private, whose names are prefixed in a manner to
|
|
avoid collisions.
|
|
|
|
@param[TREE_VALUE]
|
|
`TRIE_VALUE` is an optional payload to go with the type, <typedef:<PB>value>.
|
|
The makes it a map of <tag:<B>tree_entry> instead of a set.
|
|
|
|
@param[TREE_COMPARE]
|
|
A function satisfying <typedef:<PB>compare_fn>. Defaults to ascending order.
|
|
Required if `TREE_KEY` is changed to an incomparable type.
|
|
|
|
@param[TREE_EXPECT_TRAIT]
|
|
Do not un-define certain variables for subsequent inclusion in a parameterized
|
|
trait.
|
|
|
|
@param[TREE_TO_STRING_NAME, TREE_TO_STRING]
|
|
To string trait contained in <to_string.h>; an optional unique `<SZ>`
|
|
that satisfies `C` naming conventions when mangled and function implementing
|
|
<typedef:<PSZ>to_string_fn>.
|
|
|
|
@fixme multi-key; implementation of order statistic tree?
|
|
@fixme merge, difference
|
|
|
|
@std C89 */
|
|
|
|
#if !defined(TREE_NAME)
|
|
#error Name TREE_NAME undefined.
|
|
#endif
|
|
#if defined(TREE_TO_STRING_NAME) || defined(TREE_TO_STRING)
|
|
#define TREE_TO_STRING_TRAIT 1
|
|
#else
|
|
#define TREE_TO_STRING_TRAIT 0
|
|
#endif
|
|
#define TREE_TRAITS TREE_TO_STRING_TRAIT
|
|
#if TREE_TRAITS > 1
|
|
#error Only one trait per include is allowed; use TREE_EXPECT_TRAIT.
|
|
#endif
|
|
#if defined(TREE_TO_STRING_NAME) && !defined(TREE_TO_STRING)
|
|
#error TREE_TO_STRING_NAME requires TREE_TO_STRING.
|
|
#endif
|
|
|
|
#ifndef TREE_H /* <!-- idempotent */
|
|
#define TREE_H
|
|
#include <stddef.h> /* fixme: stdlib, string should do it; what is going on? */
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <assert.h>
|
|
#include <limits.h>
|
|
/* <Kernighan and Ritchie, 1988, p. 231>. */
|
|
#if defined(TREE_CAT_) || defined(TREE_CAT) || defined(B_) || defined(PB_) \
|
|
|| defined(TREE_IDLE)
|
|
#error Unexpected defines.
|
|
#endif
|
|
#define TREE_CAT_(n, m) n ## _ ## m
|
|
#define TREE_CAT(n, m) TREE_CAT_(n, m)
|
|
#define B_(n) TREE_CAT(TREE_NAME, n)
|
|
#define PB_(n) TREE_CAT(tree, B_(n))
|
|
/* Leaf: `TREE_MAX type`; branch: `TREE_MAX type + TREE_ORDER pointer`. In
|
|
<Goodrich, Tamassia, Mount, 2011, Data>, these are (a,b)-trees as
|
|
(TREE_MIN+1,TREE_MAX+1)-trees. */
|
|
#define TREE_MAX 5
|
|
#if TREE_MAX < 2 || TREE_MAX > UCHAR_MAX
|
|
#error TREE_MAX parameter range `[3, UCHAR_MAX]`.
|
|
#endif
|
|
/* This is the worst-case branching factor; the performance will be
|
|
\O(log_{`TREE_MIN`+1} `size`). Usually this is `⌈(TREE_MAX+1)/2⌉-1`. However,
|
|
smaller values are less-eager; in the extreme,
|
|
<Johnson, Shasha, 1993, Free-at-Empty>, show good results; this has been
|
|
chosen to provide hysteresis. (Except `TREE_MAX 2`, it's fixed.) */
|
|
#define TREE_MIN (TREE_MAX / 3 ? TREE_MAX / 3 : 1)
|
|
#if TREE_MIN == 0 || TREE_MIN > TREE_MAX / 2
|
|
#error TREE_MIN parameter range `[1, \floor(TREE_MAX / 2)]`.
|
|
#endif
|
|
#define TREE_ORDER (TREE_MAX + 1) /* Maximum degree, (branching factor.) */
|
|
#define TREE_SPLIT (TREE_ORDER / 2) /* Split index: even order left-leaning. */
|
|
#define TREE_RESULT X(ERROR), X(UNIQUE), X(YIELD)
|
|
#define X(n) TREE_##n
|
|
/** A result of modifying the tree, of which `TREE_ERROR` is false.
|
|
![A diagram of the result states.](../doc/put.png) */
|
|
enum tree_result { TREE_RESULT };
|
|
#undef X
|
|
#define X(n) #n
|
|
/** A static array of strings describing the <tag:tree_result>. */
|
|
static const char *const tree_result_str[] = { TREE_RESULT };
|
|
#undef X
|
|
#undef TREE_RESULT
|
|
struct tree_count { size_t branches, leaves; };
|
|
#endif /* idempotent --> */
|
|
|
|
|
|
#if TREE_TRAITS == 0 /* <!-- base code */
|
|
|
|
|
|
#ifndef TREE_KEY
|
|
#define TREE_KEY unsigned
|
|
#endif
|
|
|
|
/** A comparable type, defaults to `unsigned`. */
|
|
typedef TREE_KEY PB_(key);
|
|
typedef const TREE_KEY PB_(key_c);
|
|
|
|
#ifdef TREE_VALUE
|
|
/** On `TREE_VALUE`, otherwise just a set of <typedef:<PB>key>. */
|
|
typedef TREE_VALUE PB_(value);
|
|
typedef const TREE_VALUE PB_(value_c);
|
|
#endif
|
|
|
|
/** Returns a positive result if `a` is out-of-order with respect to `b`,
|
|
inducing a strict weak order. This is compatible, but less strict then the
|
|
comparators from `bsearch` and `qsort`; it only needs to divide entries
|
|
into two instead of three categories. */
|
|
typedef int (*PB_(compare_fn))(PB_(key_c) a, PB_(key_c) b);
|
|
|
|
#ifndef TREE_COMPARE /* <!-- !cmp */
|
|
/** The default `TREE_COMPARE` on `a` and `b` is integer comparison that
|
|
results in ascending order. @implements <typedef:<PH>compare_fn> */
|
|
static int PB_(default_compare)(PB_(key_c) a, PB_(key_c) b)
|
|
{ return a > b; }
|
|
#define TREE_COMPARE &PB_(default_compare)
|
|
#endif /* !cmp --> */
|
|
|
|
/* Check that `TREE_COMPARE` is a function implementing
|
|
<typedef:<PB>compare_fn>, if defined. */
|
|
static const PB_(compare_fn) PB_(compare) = (TREE_COMPARE);
|
|
|
|
/* These rules are more lazy than the original so as to not exhibit worst-case
|
|
behaviour in small trees, as <Johnson, Shasha, 1993, Free-at-Empty>, (lookup
|
|
is potentially slower after deleting.) In the terminology of
|
|
<Knuth, 1998 Art 3>,
|
|
* Every branch has at most `TREE_ORDER == TREE_MAX + 1` children, which is at
|
|
minimum three.
|
|
* Every non-root and non-bulk-loaded node has at least `TREE_MIN` keys,
|
|
(`⎣TREE_MAX/3⎦`.)
|
|
* Every branch has at least one child, `k`, and contains `k - 1` keys, (this
|
|
is a consequence of the fact that they are implicitly storing a complete
|
|
binary sub-tree.)
|
|
* All leaves are at the maximum depth and height zero; they do'n't carry links
|
|
to other nodes, (hence, leaf.) In this code, a branch node is a
|
|
specialization of a (leaf) node with children. One can tell if it's a branch
|
|
by keeping track of the height.
|
|
* There are two empty B-trees to facilitate allocation hysteresis between
|
|
0 -- 1: idle `{ 0, 0 }`, and `{ garbage leaf, UINT_MAX }`, one could test,
|
|
`!root || height == UINT_MAX`.
|
|
* Bulk-loading always is on the right side. */
|
|
struct PB_(node) {
|
|
unsigned char size; /* `[0, TREE_MAX]`. */
|
|
PB_(key) key[TREE_MAX]; /* Cache-friendly lookup. */
|
|
#ifdef TREE_VALUE
|
|
PB_(value) value[TREE_MAX];
|
|
#endif
|
|
};
|
|
/* B-tree branch is a <tag:<PB>node> and links to `size + 1` nodes. */
|
|
struct PB_(branch) { struct PB_(node) base, *child[TREE_ORDER]; };
|
|
/** @return Downcasts `as_node` to a branch. */
|
|
static struct PB_(branch) *PB_(branch)(struct PB_(node) *const as_leaf)
|
|
{ return (struct PB_(branch) *)(void *)
|
|
((char *)as_leaf - offsetof(struct PB_(branch), base)); }
|
|
/** @return Downcasts `as_node` to a branch. */
|
|
static const struct PB_(branch) *PB_(branch_c)(const struct PB_(node) *
|
|
const as_node) { return (const struct PB_(branch) *)(const void *)
|
|
((const char *)as_node - offsetof(struct PB_(branch), base)); }
|
|
/* Address specific entry. */
|
|
struct PB_(ref) { struct PB_(node) *node; unsigned height, idx; };
|
|
struct PB_(ref_c) { const struct PB_(node) *node; unsigned height, idx; };
|
|
struct PB_(tree) { struct PB_(node) *node; unsigned height; };
|
|
/** To initialize it to an idle state, see <fn:<B>tree>, `TRIE_IDLE`, `{0}`
|
|
(`C99`), or being `static`.
|
|
|
|
![States.](../doc/states.png) */
|
|
struct B_(tree);
|
|
struct B_(tree) { struct PB_(tree) root; };
|
|
|
|
#ifdef TREE_VALUE /* <!-- value */
|
|
|
|
/** On `TREE_VALUE`, creates a map from pointer-to-<typedef:<PB>key> to
|
|
pointer-to-<typedef:<PB>value>. The reason these are pointers is because it
|
|
is not connected in memory. (Does `key` still have to be?) */
|
|
struct B_(tree_entry) { PB_(key) *key; PB_(value) *value; };
|
|
struct B_(tree_entry_c) { PB_(key_c) *key; PB_(value_c) *value; };
|
|
/** On `TREE_VALUE`, otherwise it's just an alias for
|
|
pointer-to-<typedef:<PB>key>. */
|
|
typedef struct B_(tree_entry) PB_(entry);
|
|
typedef struct B_(tree_entry_c) PB_(entry_c);
|
|
static PB_(entry) PB_(null_entry)(void)
|
|
{ const PB_(entry) e = { 0, 0 }; return e; }
|
|
static PB_(entry_c) PB_(null_entry_c)(void)
|
|
{ const PB_(entry_c) e = { 0, 0 }; return e; }
|
|
static PB_(entry) PB_(leaf_to_entry)(struct PB_(node) *const leaf,
|
|
const unsigned i) { PB_(entry) e;
|
|
e.key = leaf->key + i, e.value = leaf->value + i; return e; }
|
|
static PB_(entry_c) PB_(leaf_to_entry_c)(const struct PB_(node) *const leaf,
|
|
const unsigned i) { PB_(entry_c) e;
|
|
e.key = leaf->key + i, e.value = leaf->value + i; return e; }
|
|
static PB_(value) *PB_(ref_to_value)(const struct PB_(ref) ref)
|
|
{ return ref.node ? ref.node->value + ref.idx : 0; }
|
|
|
|
#else /* value --><!-- !value */
|
|
|
|
typedef PB_(key) PB_(value);
|
|
typedef PB_(key) *PB_(entry);
|
|
typedef PB_(key_c) *PB_(entry_c);
|
|
static PB_(entry_c) PB_(null_entry_c)(void) { return 0; }
|
|
static PB_(entry) PB_(null_entry)(void) { return 0; }
|
|
static PB_(entry) PB_(leaf_to_entry)(struct PB_(node) *const leaf,
|
|
const unsigned i) { return leaf->key + i; }
|
|
static PB_(entry_c) PB_(leaf_to_entry_c)(const struct PB_(node) *const leaf,
|
|
const unsigned i) { return leaf->key + i; }
|
|
static PB_(value) *PB_(ref_to_value)(const struct PB_(ref) ref)
|
|
{ return ref.node ? ref.node->key + ref.idx : 0; }
|
|
|
|
#endif /* !value --> */
|
|
|
|
/** @return If `ref` in `tree` has a predecessor, then it decrements. */
|
|
static int PB_(to_predecessor)(struct PB_(tree) tree,
|
|
struct PB_(ref) *const ref) {
|
|
assert(ref);
|
|
if(!tree.node || tree.height == UINT_MAX) return 0; /* Empty. */
|
|
if(!ref->node) { /* Null: `ret` is the last key. */
|
|
struct PB_(tree) descend = tree;
|
|
while(descend.height) descend.height--,
|
|
descend.node = PB_(branch)(descend.node)->child[descend.node->size];
|
|
/* While bulk-loading, could have empty right. */
|
|
if(descend.node->size) ref->node = descend.node,
|
|
ref->height = 0, ref->idx = descend.node->size - 1;
|
|
else assert(tree.node->size), ref->node = tree.node,
|
|
ref->height = tree.height, ref->idx = tree.node->size - 1;
|
|
return 1;
|
|
}
|
|
while(ref->height) ref->height--,
|
|
ref->node = PB_(branch_c)(ref->node)->child[ref->idx],
|
|
ref->idx = ref->node->size;
|
|
if(ref->idx) return ref->idx--, 1; /* Likely. */
|
|
{ /* Re-descend; pick the minimum height node that has a previous key. */
|
|
struct PB_(ref) prev;
|
|
unsigned a0;
|
|
PB_(key) x;
|
|
for(prev.node = 0, x = ref->node->key[0]; tree.height;
|
|
tree.node = PB_(branch_c)(tree.node)->child[a0], tree.height--) {
|
|
/* fixme: This is repeated. */
|
|
unsigned a1 = tree.node->size;
|
|
a0 = 0;
|
|
while(a0 < a1) {
|
|
const unsigned m = (a0 + a1) / 2;
|
|
if(PB_(compare)(x, tree.node->key[m]) > 0) a0 = m + 1; else a1 = m;
|
|
}
|
|
if(a0)
|
|
prev.node = tree.node, prev.height = tree.height, prev.idx = a0 - 1;
|
|
}
|
|
if(!prev.node) return 0; /* Off the left. */
|
|
*ref = prev;
|
|
} return 1; /* Jumped nodes. */
|
|
}
|
|
/* @return If `ref_c` in `tree` has a successor, then it increments. */
|
|
#define TREE_TO_SUCCESSOR(to_successor_c, ref_c) \
|
|
static int PB_(to_successor_c)(struct PB_(tree) tree, \
|
|
struct PB_(ref_c) *const ref) { \
|
|
assert(ref); \
|
|
if(!tree.node || tree.height == UINT_MAX) return 0; /* Empty. */ \
|
|
if(!ref->node) \
|
|
ref->node = tree.node, ref->height = tree.height, ref->idx = 0; \
|
|
else \
|
|
ref->idx++; \
|
|
while(ref->height) ref->height--, \
|
|
ref->node = PB_(branch_c)(ref->node)->child[ref->idx], ref->idx = 0; \
|
|
if(ref->idx < ref->node->size) return 1; /* Likely. */ \
|
|
if(!ref->node->size) return 0; /* When bulk-loading. */ \
|
|
{ /* Re-descend; pick the minimum height node that has a next key. */ \
|
|
struct PB_(ref_c) next; \
|
|
unsigned a0; \
|
|
PB_(key) x; \
|
|
for(next.node = 0, x = ref->node->key[ref->node->size - 1]; tree.height; \
|
|
tree.node = PB_(branch_c)(tree.node)->child[a0], tree.height--) { \
|
|
unsigned a1 = tree.node->size; \
|
|
a0 = 0; \
|
|
while(a0 < a1) { \
|
|
const unsigned m = (a0 + a1) / 2; \
|
|
if(PB_(compare)(x, tree.node->key[m]) > 0) a0 = m + 1; else a1 = m;\
|
|
} \
|
|
if(a0 < tree.node->size) \
|
|
next.node = tree.node, next.height = tree.height, next.idx = a0; \
|
|
} \
|
|
if(!next.node) return 0; /* Off the right. */ \
|
|
*ref = next; \
|
|
} return 1; /* Jumped nodes. */ \
|
|
}
|
|
TREE_TO_SUCCESSOR(to_successor, ref)
|
|
TREE_TO_SUCCESSOR(to_successor_c, ref_c) /* For forward iteration. */
|
|
#undef TREE_TO_SUCCESSOR
|
|
|
|
#define BOX_CONTENT PB_(entry_c)
|
|
/** Is `e` not null? @implements `is_element_c` */
|
|
static int PB_(is_element_c)(PB_(entry_c) e) {
|
|
#ifdef TREE_VALUE
|
|
return !!e.key;
|
|
#else
|
|
return !!e;
|
|
#endif
|
|
}
|
|
/* @implements `forward` */
|
|
struct PB_(forward) { const struct PB_(tree) *root; struct PB_(ref_c) next; };
|
|
/** @return Before `tree`. @implements `forward` */
|
|
static struct PB_(forward) PB_(forward)(const struct B_(tree) *const
|
|
tree) { struct PB_(forward) it;
|
|
it.root = tree ? &tree->root : 0, it.next.node = 0;
|
|
return it;
|
|
}
|
|
/** Move to next `it`. @return Element or null. @implements `next_c` */
|
|
static PB_(entry_c) PB_(next_c)(struct PB_(forward) *const it) {
|
|
return assert(it), PB_(to_successor_c)(*it->root, &it->next) ?
|
|
PB_(leaf_to_entry_c)(it->next.node, it->next.idx) : PB_(null_entry_c)();
|
|
}
|
|
|
|
#define BOX_ITERATOR PB_(entry)
|
|
/** Is `x` not null? @implements `is_element` */
|
|
static int PB_(is_element)(const PB_(entry) e) {
|
|
#ifdef TREE_VALUE
|
|
return !!e.key;
|
|
#else
|
|
return !!e;
|
|
#endif
|
|
}
|
|
/* @implements `iterator` */
|
|
struct PB_(iterator) { struct PB_(tree) *root; struct PB_(ref) i; int seen; };
|
|
/** @return Iterator to null in `tree`. @implements `iterator` */
|
|
static struct PB_(iterator) PB_(iterator)(struct B_(tree) *const tree) {
|
|
struct PB_(iterator) it;
|
|
it.root = tree ? &tree->root : 0, it.i.node = 0, it.seen = 0;
|
|
return it;
|
|
}
|
|
/** Advances `it`. @return Element or null. @implements `next` */
|
|
static PB_(entry) PB_(next)(struct PB_(iterator) *const it) {
|
|
assert(it);
|
|
if(!it->root
|
|
|| (it->seen || !it->i.node) && !PB_(to_successor)(*it->root, &it->i)) {
|
|
it->i.node = 0, it->seen = 0;
|
|
return PB_(null_entry)();
|
|
}
|
|
it->seen = 1;
|
|
return PB_(leaf_to_entry)(it->i.node, it->i.idx);
|
|
}
|
|
/** Move to previous `it`. @return Element or null. @implements `previous` */
|
|
static PB_(entry) PB_(previous)(struct PB_(iterator) *const it) {
|
|
assert(it && 0);
|
|
if(!it->root || !it->i.node && it->seen != -1
|
|
|| it->seen && !PB_(to_predecessor)(*it->root, &it->i))
|
|
return PB_(null_entry)();
|
|
it->seen = -1;
|
|
return PB_(leaf_to_entry)(it->i.node, it->i.idx);
|
|
}
|
|
|
|
/* Want to find slightly different things; code re-use is bad. Confusing. */
|
|
#define TREE_FORTREE(i) i.node = tree->node, i.height = tree->height; ; \
|
|
i.node = PB_(branch_c)(i.node)->child[i.idx], i.height--
|
|
#define TREE_START(i) unsigned hi = i.node->size; i.idx = 0;
|
|
#define TREE_FORNODE(i, continue) if(!hi) continue; \
|
|
do { \
|
|
const unsigned m = (i.idx + hi) / 2; \
|
|
if(PB_(compare)(key, i.node->key[m]) > 0) i.idx = m + 1; \
|
|
else hi = m; \
|
|
} while(i.idx < hi);
|
|
#define TREE_FLIPPED(i) PB_(compare)(i.node->key[i.idx], key) <= 0
|
|
/** One height at a time. */
|
|
static void PB_(find_idx)(struct PB_(ref) *const lo, const PB_(key) key) {
|
|
TREE_START((*lo))
|
|
TREE_FORNODE((*lo), return)
|
|
}
|
|
/** Finds lower-bound of `key` in `tree`. */
|
|
static struct PB_(ref) PB_(lower_r)(struct PB_(tree) *const tree,
|
|
const PB_(key) key) {
|
|
struct PB_(ref) i, lo = { 0, 0, 0 };
|
|
for(TREE_FORTREE(i)) {
|
|
TREE_START(i)
|
|
TREE_FORNODE(i, continue)
|
|
if(i.idx < i.node->size) {
|
|
lo = i;
|
|
/* Might be useful expanding this to multi-keys. */
|
|
if(TREE_FLIPPED(i)) break;
|
|
}
|
|
if(!i.height) break;
|
|
}
|
|
return lo;
|
|
}
|
|
/** Finds lower-bound of `key` in `tree` while counting the non-filled `hole`
|
|
and `is_equal`. (fixme: is_equal useless) */
|
|
static struct PB_(ref) PB_(lookup_insert)(struct PB_(tree) *const tree,
|
|
const PB_(key) key, struct PB_(ref) *const hole, int *const is_equal) {
|
|
struct PB_(ref) lo;
|
|
hole->node = 0;
|
|
for(TREE_FORTREE(lo)) {
|
|
TREE_START(lo)
|
|
if(hi < TREE_MAX) *hole = lo;
|
|
TREE_FORNODE(lo, continue)
|
|
if(lo.node->size < TREE_MAX) hole->idx = lo.idx;
|
|
if(lo.idx < lo.node->size && TREE_FLIPPED(lo)) { *is_equal = 1; break; }
|
|
if(!lo.height) break;
|
|
}
|
|
return lo;
|
|
}
|
|
/** Finds lower-bound of `key` in `tree` while counting the non-minimum `hole`
|
|
and `is_equal`. (fixme: is_equal useless) */
|
|
static struct PB_(ref) PB_(lookup_remove)(struct PB_(tree) *const tree,
|
|
const PB_(key) key, struct PB_(ref) *const lump) {
|
|
struct PB_(ref) lo;
|
|
lump->node = 0;
|
|
for(TREE_FORTREE(lo)) {
|
|
TREE_START(lo)
|
|
TREE_FORNODE(lo, continue)
|
|
if(lo.node->size > TREE_MIN || lo.height && (
|
|
lo.idx
|
|
&& PB_(branch)(lo.node)->child[lo.idx - 1]->size > TREE_MIN
|
|
|| lo.idx < lo.node->size
|
|
&& PB_(branch)(lo.node)->child[lo.idx + 1]->size > TREE_MIN
|
|
)) *lump = lo;
|
|
if(lo.idx < lo.node->size && TREE_FLIPPED(lo)) break;
|
|
if(!lo.height) { lo.node = 0; break; } /* Was not in. */
|
|
}
|
|
if(!lump->node) {
|
|
/* Check for root. */
|
|
assert(0);
|
|
}
|
|
return lo;
|
|
}
|
|
#undef TREE_FORTREE
|
|
#undef TREE_START
|
|
#undef TREE_FORNODE
|
|
#undef TREE_FLIPPED
|
|
|
|
/** @param[tree] Can be null. @return Lower bound of `x` in `tree`.
|
|
@order \O(\log |`tree`|) */
|
|
static struct PB_(ref) PB_(lower)(struct PB_(tree) tree, const PB_(key) x) {
|
|
if(!tree.node || tree.height == UINT_MAX) {
|
|
struct PB_(ref) ref; ref.node = 0; return ref;
|
|
} else {
|
|
return PB_(lower_r)(&tree, x);
|
|
}
|
|
}
|
|
|
|
/** Frees non-empty `tree` and it's children recursively, but doesn't put it
|
|
to idle or clear pointers.
|
|
@param[one] If `one` is valid, tries to keep one leaf. Set to null before. */
|
|
static void PB_(clear_r)(struct PB_(tree) tree, struct PB_(node) **const keep) {
|
|
assert(tree.node);
|
|
if(!tree.height) {
|
|
if(keep && !*keep) *keep = tree.node;
|
|
else free(tree.node);
|
|
} else {
|
|
struct PB_(tree) child;
|
|
unsigned i;
|
|
child.height = tree.height - 1;
|
|
for(i = 0; i <= tree.node->size; i++)
|
|
child.node = PB_(branch)(tree.node)->child[i],
|
|
PB_(clear_r)(child, keep);
|
|
free(PB_(branch)(tree.node));
|
|
}
|
|
}
|
|
/** `tree` can be null. */
|
|
static void PB_(clear)(struct B_(tree) *tree) {
|
|
struct PB_(node) *one = 0;
|
|
/* Already not there/idle/empty. */
|
|
if(!tree || !tree->root.node || tree->root.height == UINT_MAX) return;
|
|
PB_(clear_r)(tree->root, &one), assert(one);
|
|
/* This is a special state where the tree has one leaf, but it is empty.
|
|
This state exists because it gives hysteresis to 0 -- 1 transition. */
|
|
tree->root.node = one;
|
|
tree->root.height = UINT_MAX;
|
|
}
|
|
|
|
/* Box override information. */
|
|
#define BOX_ PB_
|
|
#define BOX struct B_(tree)
|
|
|
|
/** @return Initializes `tree` to idle. @order \Theta(1) @allow */
|
|
static struct B_(tree) B_(tree)(void) {
|
|
struct B_(tree) tree;
|
|
tree.root.node = 0; tree.root.height = 0;
|
|
return tree;
|
|
}
|
|
|
|
/** Returns an initialized `tree` to idle, `tree` can be null. @allow */
|
|
static void B_(tree_)(struct B_(tree) *const tree) {
|
|
if(!tree) return; /* Null. */
|
|
if(!tree->root.node) { /* Idle. */
|
|
assert(!tree->root.height);
|
|
} else if(tree->root.height == UINT_MAX) { /* Empty. */
|
|
assert(tree->root.node), free(tree->root.node);
|
|
} else {
|
|
PB_(clear_r)(tree->root, 0);
|
|
}
|
|
*tree = B_(tree)();
|
|
}
|
|
|
|
/** Stores an iteration in a tree. Generally, changes in the topology of the
|
|
tree invalidate it. (Future: have insert and delete with iterators.) */
|
|
struct B_(tree_iterator) { struct PB_(iterator) _; };
|
|
/** @return An iterator before the first element of `tree`. Can be null.
|
|
@allow */
|
|
static struct B_(tree_iterator) B_(tree_iterator)(struct B_(tree) *const tree)
|
|
{ struct B_(tree_iterator) it; it._ = PB_(iterator)(tree); return it; }
|
|
/** Advances `it` to the next element. @return A pointer to the current
|
|
element or null. @allow */
|
|
static PB_(entry) B_(tree_next)(struct B_(tree_iterator) *const it)
|
|
{ return PB_(next)(&it->_); }
|
|
|
|
/** @param[tree] Can be null. @return Finds the smallest entry in `tree` that
|
|
is at the lower bound of `x`. If `x` is higher than any of `tree`, it will be
|
|
placed just passed the end. @order \O(\log |`tree`|) @allow */
|
|
static struct B_(tree_iterator) B_(tree_lower_iterator)
|
|
(struct B_(tree) *const tree, const PB_(key) x) {
|
|
struct B_(tree_iterator) it;
|
|
if(!tree) return it._.root = 0, it;
|
|
it._.i = PB_(lower)(tree->root, x);
|
|
it._.root = &tree->root;
|
|
it._.seen = 0;
|
|
return it;
|
|
}
|
|
|
|
/** For example, `tree = { 10 }`, `x = 5 -> 10`, `x = 10 -> 10`,
|
|
`x = 11 -> null`.
|
|
@return Lower-bound value match for `x` in `tree` or null if `x` is greater
|
|
than all in `tree`. @order \O(\log |`tree`|) @allow */
|
|
static PB_(value) *B_(tree_lower_value)(struct B_(tree) *const tree,
|
|
const PB_(key) x)
|
|
{ return tree ? PB_(ref_to_value)(PB_(lower)(tree->root, x)) : 0; }
|
|
|
|
/** Clears `tree`, which can be null, idle, empty, or full. If it is empty or
|
|
full, it remains active. */
|
|
static void B_(tree_clear)(struct B_(tree) *const tree) { PB_(clear)(tree); }
|
|
|
|
#ifdef TREE_VALUE /* <!-- map */
|
|
/** Packs `key` on the right side of `tree` without doing the usual
|
|
restructuring. All other topology modification functions should be avoided
|
|
until followed by <fn:<B>tree_bulk_finish>.
|
|
@param[value] A pointer to the key's value which is set by the function on
|
|
returning true. A null pointer in this parameter causes the value to go
|
|
uninitialized. This parameter is not there if one didn't specify `TREE_VALUE`.
|
|
@return One of <tag:tree_result>: `TREE_ERROR` and `errno` will be set,
|
|
`TREE_YIELD` if the key is already (the highest) in the tree, and
|
|
`TREE_UNIQUE`, added, the `value` (if specified) is uninitialized.
|
|
@throws[EDOM] `x` is smaller than the largest key in `tree`. @throws[malloc] */
|
|
static enum tree_result B_(tree_bulk_add)(struct B_(tree) *const tree,
|
|
PB_(key) key, PB_(value) **const value)
|
|
#else /* map --><!-- set */
|
|
static enum tree_result B_(tree_bulk_add)(struct B_(tree) *const tree,
|
|
PB_(key) key)
|
|
#endif
|
|
{
|
|
struct PB_(node) *node = 0, *head = 0; /* The original and new. */
|
|
assert(tree);
|
|
if(!tree->root.node) { /* Idle tree. */
|
|
assert(!tree->root.height);
|
|
if(!(node = malloc(sizeof *node))) goto catch;
|
|
node->size = 0;
|
|
tree->root.node = node;
|
|
} else if(tree->root.height == UINT_MAX) { /* Empty tree. */
|
|
tree->root.height = 0;
|
|
tree->root.node->size = 0;
|
|
} else {
|
|
struct PB_(tree) unfull = { 0, 0 };
|
|
unsigned new_nodes, n; /* Count new nodes. */
|
|
struct PB_(node) *tail = 0, *last = 0;
|
|
struct PB_(branch) *pretail = 0;
|
|
struct PB_(tree) scout;
|
|
PB_(key) i;
|
|
for(scout = tree->root; ; scout.node = PB_(branch)(scout.node)
|
|
->child[scout.node->size], scout.height--) {
|
|
if(scout.node->size < TREE_MAX) unfull = scout;
|
|
if(scout.node->size) last = scout.node;
|
|
if(!scout.height) break;
|
|
}
|
|
assert(last), i = last->key[last->size - 1];
|
|
/* Verify that the argument is not smaller than the largest. */
|
|
if(PB_(compare)(i, key) > 0) return errno = EDOM, TREE_ERROR;
|
|
if(PB_(compare)(key, i) <= 0) {
|
|
#ifdef TREE_VALUE
|
|
if(value) { /* Last value in the last node. */
|
|
struct PB_(ref) ref;
|
|
ref.node = last, ref.idx = last->size - 1;
|
|
*value = PB_(ref_to_value)(ref);
|
|
}
|
|
#endif
|
|
return TREE_YIELD;
|
|
}
|
|
|
|
/* One leaf, and the rest branches. */
|
|
new_nodes = n = unfull.node ? unfull.height : tree->root.height + 2;
|
|
/*printf("new_nodes: %u, tree height %u\n", new_nodes, tree->height);*/
|
|
if(!n) {
|
|
node = unfull.node;
|
|
} else {
|
|
if(!(node = tail = malloc(sizeof *tail))) goto catch;
|
|
tail->size = 0;
|
|
/*printf("new tail: %s.\n", orcify(tail));*/
|
|
while(--n) {
|
|
struct PB_(branch) *b;
|
|
if(!(b = malloc(sizeof *b))) goto catch;
|
|
b->base.size = 0;
|
|
/*printf("new branch: %s.\n", orcify(b));*/
|
|
if(!head) b->child[0] = 0, pretail = b; /* First loop. */
|
|
else b->child[0] = head; /* Not first loop. */
|
|
head = &b->base;
|
|
}
|
|
}
|
|
|
|
/* Post-error; modify the original as needed. */
|
|
if(pretail) pretail->child[0] = tail;
|
|
else head = node;
|
|
if(!unfull.node) { /* Add tree to head. */
|
|
struct PB_(branch) *const branch = PB_(branch)(head);
|
|
/*printf("adding the existing root, %s to %s\n",
|
|
orcify(tree->root), orcify(head));*/
|
|
assert(new_nodes > 1);
|
|
branch->child[1] = branch->child[0];
|
|
branch->child[0] = tree->root.node;
|
|
node = tree->root.node = head, tree->root.height++;
|
|
} else if(unfull.height) { /* Add head to tree. */
|
|
struct PB_(branch) *const branch = PB_(branch)(node = unfull.node);
|
|
/*printf("adding the linked list, %s to %s at %u\n",
|
|
orcify(head), orcify(inner), inner->base.size + 1);*/
|
|
assert(new_nodes);
|
|
branch->child[branch->base.size + 1] = head;
|
|
}
|
|
}
|
|
assert(node && node->size < TREE_MAX);
|
|
node->key[node->size] = key;
|
|
#ifdef TREE_VALUE
|
|
if(value) {
|
|
struct PB_(ref) ref;
|
|
ref.node = node, ref.idx = node->size;
|
|
*value = PB_(ref_to_value)(ref);
|
|
}
|
|
#endif
|
|
node->size++;
|
|
return TREE_UNIQUE;
|
|
catch:
|
|
free(node); /* Didn't work out. */
|
|
while(head) {
|
|
struct PB_(node) *const next = PB_(branch)(head)->child[0];
|
|
free(head);
|
|
head = next;
|
|
}
|
|
if(!errno) errno = ERANGE;
|
|
return TREE_ERROR;
|
|
}
|
|
|
|
/** Distributes `tree` on the right side so that, after a series of
|
|
<fn:<B>tree_bulk_add>, it will be consistent with the minimum number of keys
|
|
in a node. @return The re-distribution was a success and all nodes are within
|
|
rules. The only time that it would be false is if a regular operation was
|
|
performed interspersed with a bulk insertion without calling this function.
|
|
(Maybe we should up the minimum to 1/2 for this function?)
|
|
@order \O(\log `size`) */
|
|
static int B_(tree_bulk_finish)(struct B_(tree) *const tree) {
|
|
struct PB_(tree) s;
|
|
struct PB_(node) *right;
|
|
if(!tree || !tree->root.node || tree->root.height == UINT_MAX) return 1;
|
|
for(s = tree->root; s.height; s.node = right, s.height--) {
|
|
unsigned distribute, right_want, right_move, take_sibling;
|
|
struct PB_(branch) *parent = PB_(branch)(s.node);
|
|
struct PB_(node) *sibling = (assert(parent->base.size),
|
|
parent->child[parent->base.size - 1]);
|
|
right = parent->child[parent->base.size];
|
|
if(TREE_MIN <= right->size) continue; /* Has enough. */
|
|
distribute = sibling->size + right->size;
|
|
/* Should have at least `TREE_MAX` on left. */
|
|
if(distribute < 2 * TREE_MIN) return 0;
|
|
right_want = distribute / 2;
|
|
right_move = right_want - right->size;
|
|
take_sibling = right_move - 1;
|
|
/* Either the right has met the properties of a B-tree node, (covered
|
|
above,) or the left sibling is full from bulk-loading (relaxed.) */
|
|
assert(right->size < right_want && right_want >= TREE_MIN
|
|
&& sibling->size - take_sibling >= TREE_MIN + 1);
|
|
/* Move the right node to accept more keys. */
|
|
memmove(right->key + right_move, right->key,
|
|
sizeof *right->key * right->size);
|
|
#ifdef TREE_VALUE
|
|
memmove(right->value + right_move, right->value,
|
|
sizeof *right->value * right->size);
|
|
#endif
|
|
printf("height %u\n", s.height);
|
|
if(s.height > 1) { /* (Parent height.) */
|
|
struct PB_(branch) *rbranch = PB_(branch)(right),
|
|
*sbranch = PB_(branch)(sibling);
|
|
memmove(rbranch->child + right_move, rbranch->child,
|
|
sizeof *rbranch->child * (right->size + 1));
|
|
memcpy(rbranch->child, sbranch->child + sibling->size + 1
|
|
- right_move, sizeof *sbranch->child * right_move);
|
|
}
|
|
right->size += right_move;
|
|
/* Move one node from the parent. */
|
|
printf("right:%u <- parent:%u (1)\n",
|
|
take_sibling, parent->base.size - 1);
|
|
memcpy(right->key + take_sibling,
|
|
parent->base.key + parent->base.size - 1, sizeof *right->key);
|
|
#ifdef TREE_VALUE
|
|
memcpy(right->value + take_sibling,
|
|
parent->base.value + parent->base.size - 1, sizeof *right->value);
|
|
#endif
|
|
/* Move the others from the sibling. */
|
|
printf("right <- sibling(%u) down to %u\n",
|
|
sibling->size, take_sibling);
|
|
memcpy(right->key, sibling->key + sibling->size - take_sibling,
|
|
sizeof *right->key * take_sibling);
|
|
#ifdef TREE_VALUE
|
|
memcpy(right->value, sibling->value + sibling->size - take_sibling,
|
|
sizeof *right->value * take_sibling);
|
|
#endif
|
|
sibling->size -= take_sibling;
|
|
/* Sibling's key is now the parent's. */
|
|
printf("parent:%u <- sibling:%u (1)\n",
|
|
parent->base.size - 1, sibling->size - 1);
|
|
memcpy(parent->base.key + parent->base.size - 1,
|
|
sibling->key + sibling->size - 1, sizeof *right->key);
|
|
#ifdef TREE_VALUE
|
|
memcpy(parent->base.value + parent->base.size - 1,
|
|
sibling->value + sibling->size - 1, sizeof *right->value);
|
|
#endif
|
|
sibling->size--;
|
|
/* fixme: Also take the children. This is backwards in right. */
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
#ifdef TREE_VALUE /* <!-- map */
|
|
/** @param[value] If non-null and successful, a pointer that receives the
|
|
address of the value associated with the key. Only present if `TREE_VALUE`
|
|
(map) was specified.
|
|
@return Either `TREE_ERROR` (false) and doesn't touch `tree`, `TREE_UNIQUE`
|
|
and adds a new key, or `TREE_YIELD` and updates an existing key.
|
|
@throws[malloc] */
|
|
static enum tree_result B_(tree_add)(struct B_(tree) *const tree,
|
|
PB_(key) key, PB_(value) **const value)
|
|
#else /* map --><!-- set */
|
|
static enum tree_result B_(tree_add)(struct B_(tree) *const tree,
|
|
PB_(key) key)
|
|
#endif /* set --> */
|
|
{
|
|
struct PB_(node) *new_head = 0;
|
|
struct PB_(ref) add, hole, cursor;
|
|
int is_growing = 0;
|
|
assert(tree);
|
|
if(!(add.node = tree->root.node)) goto idle;
|
|
else if(tree->root.height == UINT_MAX) goto empty;
|
|
goto descend;
|
|
idle: /* No reserved memory. */
|
|
assert(!add.node && !tree->root.height);
|
|
if(!(add.node = malloc(sizeof *add.node))) goto catch;
|
|
tree->root.node = add.node;
|
|
tree->root.height = UINT_MAX;
|
|
goto empty;
|
|
empty: /* Reserved dynamic memory, but tree is empty. */
|
|
assert(add.node && tree->root.height == UINT_MAX);
|
|
add.height = tree->root.height = 0;
|
|
add.node->size = 0;
|
|
add.idx = 0;
|
|
goto insert;
|
|
descend: /* Record last node that has space. */
|
|
{
|
|
int is_equal = 0;
|
|
add = PB_(lookup_insert)(&tree->root, key, &hole, &is_equal);
|
|
if(is_equal) {
|
|
/* Assumes key is unique; we might not want this for multi-maps,
|
|
but that is not implemented yet. */
|
|
#ifdef TREE_VALUE
|
|
if(value) *value = PB_(ref_to_value)(add);
|
|
#endif
|
|
return TREE_YIELD;
|
|
}
|
|
}
|
|
if(hole.node == add.node) goto insert; else goto grow;
|
|
insert: /* Leaf has space to spare; usually end up here. */
|
|
assert(add.node && add.idx <= add.node->size && add.node->size < TREE_MAX
|
|
&& (!add.height || is_growing));
|
|
memmove(add.node->key + add.idx + 1, add.node->key + add.idx,
|
|
sizeof *add.node->key * (add.node->size - add.idx));
|
|
#ifdef TREE_VALUE
|
|
memmove(add.node->value + add.idx + 1, add.node->value + add.idx,
|
|
sizeof *add.node->value * (add.node->size - add.idx));
|
|
#endif
|
|
add.node->size++;
|
|
add.node->key[add.idx] = key;
|
|
#ifdef TREE_VALUE
|
|
if(value) *value = PB_(ref_to_value)(add);
|
|
#endif
|
|
return TREE_UNIQUE;
|
|
grow: /* Leaf is full. */ {
|
|
unsigned new_no = hole.node ? hole.height : tree->root.height + 2;
|
|
struct PB_(node) **new_next = &new_head, *new_leaf;
|
|
struct PB_(branch) *new_branch;
|
|
assert(new_no);
|
|
/* Allocate new nodes in succession. */
|
|
while(new_no != 1) { /* All branches except one. */
|
|
if(!(new_branch = malloc(sizeof *new_branch))) goto catch;
|
|
new_branch->base.size = 0;
|
|
new_branch->child[0] = 0;
|
|
*new_next = &new_branch->base, new_next = new_branch->child;
|
|
new_no--;
|
|
}
|
|
/* Last point of potential failure; (don't need to have entry in catch.) */
|
|
if(!(new_leaf = malloc(sizeof *new_leaf))) goto catch;
|
|
new_leaf->size = 0;
|
|
*new_next = new_leaf;
|
|
/* Attach new nodes to the tree. The hole is now an actual hole. */
|
|
if(hole.node) { /* New nodes are a sub-structure of the tree. */
|
|
struct PB_(branch) *holeb = PB_(branch)(hole.node);
|
|
memmove(hole.node->key + hole.idx + 1, hole.node->key + hole.idx,
|
|
sizeof *hole.node->key * (hole.node->size - hole.idx));
|
|
#ifdef TREE_VALUE
|
|
memmove(hole.node->value + hole.idx + 1, hole.node->value + hole.idx,
|
|
sizeof *hole.node->value * (hole.node->size - hole.idx));
|
|
#endif
|
|
memmove(holeb->child + hole.idx + 2, holeb->child + hole.idx + 1,
|
|
sizeof *holeb->child * (hole.node->size - hole.idx));
|
|
holeb->child[hole.idx + 1] = new_head;
|
|
hole.node->size++;
|
|
} else { /* New nodes raise tree height. */
|
|
struct PB_(branch) *const new_root = PB_(branch)(new_head);
|
|
hole.node = new_head, hole.height = ++tree->root.height, hole.idx = 0;
|
|
new_head = new_root->child[1] = new_root->child[0];
|
|
new_root->child[0] = tree->root.node, tree->root.node = hole.node;
|
|
hole.node->size = 1;
|
|
}
|
|
cursor = hole; /* Go down; (as opposed to doing it on paper.) */
|
|
goto split;
|
|
} split: { /* Split between the new and existing nodes. */
|
|
struct PB_(node) *sibling;
|
|
assert(cursor.node && cursor.node->size && cursor.height);
|
|
sibling = new_head;
|
|
/* Descend now while split hasn't happened -- easier. */
|
|
new_head = --cursor.height ? PB_(branch)(new_head)->child[0] : 0;
|
|
cursor.node = PB_(branch)(cursor.node)->child[cursor.idx];
|
|
PB_(find_idx)(&cursor, key);
|
|
assert(!sibling->size && cursor.node->size == TREE_MAX); /* Atomic. */
|
|
/* Expand `cursor`, which is full, to multiple nodes. */
|
|
if(cursor.idx < TREE_SPLIT) { /* Descend hole to `cursor`. */
|
|
memcpy(sibling->key, cursor.node->key + TREE_SPLIT,
|
|
sizeof *sibling->key * (TREE_MAX - TREE_SPLIT));
|
|
#ifdef TREE_VALUE
|
|
memcpy(sibling->value, cursor.node->value + TREE_SPLIT,
|
|
sizeof *sibling->value * (TREE_MAX - TREE_SPLIT));
|
|
#endif
|
|
hole.node->key[hole.idx] = cursor.node->key[TREE_SPLIT - 1];
|
|
#ifdef TREE_VALUE
|
|
hole.node->value[hole.idx] = cursor.node->value[TREE_SPLIT - 1];
|
|
#endif
|
|
memmove(cursor.node->key + cursor.idx + 1,
|
|
cursor.node->key + cursor.idx,
|
|
sizeof *cursor.node->key * (TREE_SPLIT - 1 - cursor.idx));
|
|
#ifdef TREE_VALUE
|
|
memmove(cursor.node->value + cursor.idx + 1,
|
|
cursor.node->value + cursor.idx,
|
|
sizeof *cursor.node->value * (TREE_SPLIT - 1 - cursor.idx));
|
|
#endif
|
|
if(cursor.height) {
|
|
struct PB_(branch) *const cb = PB_(branch)(cursor.node),
|
|
*const sb = PB_(branch)(sibling);
|
|
struct PB_(node) *temp = sb->child[0];
|
|
memcpy(sb->child, cb->child + TREE_SPLIT,
|
|
sizeof *cb->child * (TREE_MAX - TREE_SPLIT + 1));
|
|
memmove(cb->child + cursor.idx + 2, cb->child + cursor.idx + 1,
|
|
sizeof *cb->child * (TREE_SPLIT - 1 - cursor.idx));
|
|
cb->child[cursor.idx + 1] = temp;
|
|
}
|
|
hole = cursor;
|
|
} else if(cursor.idx > TREE_SPLIT) { /* Descend hole to `sibling`. */
|
|
hole.node->key[hole.idx] = cursor.node->key[TREE_SPLIT];
|
|
#ifdef TREE_VALUE
|
|
hole.node->value[hole.idx] = cursor.node->value[TREE_SPLIT];
|
|
#endif
|
|
hole.node = sibling, hole.height = cursor.height,
|
|
hole.idx = cursor.idx - TREE_SPLIT - 1;
|
|
memcpy(sibling->key, cursor.node->key + TREE_SPLIT + 1,
|
|
sizeof *sibling->key * hole.idx);
|
|
memcpy(sibling->key + hole.idx + 1, cursor.node->key + cursor.idx,
|
|
sizeof *sibling->key * (TREE_MAX - cursor.idx));
|
|
#ifdef TREE_VALUE
|
|
memcpy(sibling->value, cursor.node->value + TREE_SPLIT + 1,
|
|
sizeof *sibling->value * hole.idx);
|
|
memcpy(sibling->value + hole.idx + 1, cursor.node->value + cursor.idx,
|
|
sizeof *sibling->value * (TREE_MAX - cursor.idx));
|
|
#endif
|
|
if(cursor.height) {
|
|
struct PB_(branch) *const cb = PB_(branch)(cursor.node),
|
|
*const sb = PB_(branch)(sibling);
|
|
struct PB_(node) *temp = sb->child[0];
|
|
memcpy(sb->child, cb->child + TREE_SPLIT + 1,
|
|
sizeof *cb->child * (hole.idx + 1));
|
|
memcpy(sb->child + hole.idx + 2, cb->child + cursor.idx + 1,
|
|
sizeof *cb->child * (TREE_MAX - cursor.idx));
|
|
sb->child[hole.idx + 1] = temp;
|
|
}
|
|
} else { /* Equal split: leave the hole where it is. */
|
|
memcpy(sibling->key, cursor.node->key + TREE_SPLIT,
|
|
sizeof *sibling->key * (TREE_MAX - TREE_SPLIT));
|
|
#ifdef TREE_VALUE
|
|
memcpy(sibling->value, cursor.node->value + TREE_SPLIT,
|
|
sizeof *sibling->value * (TREE_MAX - TREE_SPLIT));
|
|
#endif
|
|
if(cursor.height) {
|
|
struct PB_(branch) *const cb = PB_(branch)(cursor.node),
|
|
*const sb = PB_(branch)(sibling);
|
|
memcpy(sb->child + 1, cb->child + TREE_SPLIT + 1,
|
|
sizeof *cb->child * (TREE_MAX - TREE_SPLIT));
|
|
}
|
|
}
|
|
/* Divide `TREE_MAX + 1` into two trees. */
|
|
cursor.node->size = TREE_SPLIT, sibling->size = TREE_MAX - TREE_SPLIT;
|
|
if(cursor.height) goto split; /* Loop max `\log_{TREE_MIN} size`. */
|
|
hole.node->key[hole.idx] = key;
|
|
#ifdef TREE_VALUE
|
|
if(value) *value = PB_(ref_to_value)(hole);
|
|
#endif
|
|
assert(!new_head);
|
|
return TREE_UNIQUE;
|
|
} catch:
|
|
while(new_head) {
|
|
struct PB_(branch) *const top = PB_(branch)(new_head);
|
|
new_head = top->child[0];
|
|
free(top);
|
|
}
|
|
if(!errno) errno = ERANGE; /* Non-POSIX OSs not mandated to set errno. */
|
|
return TREE_ERROR;
|
|
}
|
|
|
|
/** Tries to remove `key` from `tree`. @return Success. */
|
|
static int B_(tree_remove)(struct B_(tree) *const tree,
|
|
const PB_(key) key) {
|
|
struct PB_(ref) rm, lump;
|
|
struct {
|
|
struct {
|
|
PB_(key) key;
|
|
#ifdef TREE_VALUE
|
|
PB_(value) value;
|
|
#endif
|
|
struct PB_(node) *link;
|
|
int link_lt;
|
|
} store[2];
|
|
unsigned next;
|
|
} temp;
|
|
temp.next = 0;
|
|
assert(tree);
|
|
/* Traverse down the tree until the `key`. */
|
|
if(!(rm.node = tree->root.node) || tree->root.height == UINT_MAX
|
|
|| !(rm = PB_(lookup_remove)(&tree->root, key, &lump)).node) return 0;
|
|
if(rm.height) goto branch; else goto leaf;
|
|
branch: {
|
|
struct PB_(ref) succ;
|
|
struct PB_(ref) pred;
|
|
pred = rm;
|
|
succ = rm;
|
|
/* This will be more efficient duplicating code? it actually doesn't need
|
|
all the code.
|
|
while(ref->height) ref->height--,
|
|
ref->node = PB_(branch_c)(ref->node)->child[ref->idx],
|
|
ref->idx = ref->node->size;
|
|
if(ref->idx) return ref->idx--, 1; <-- predecessor
|
|
ref->idx++;
|
|
while(ref->height) ref->height--,
|
|
ref->node = PB_(branch_c)(ref->node)->child[ref->idx], ref->idx = 0;
|
|
if(ref->idx < ref->node->size) return 1; <-- successor */
|
|
assert(0);
|
|
} leaf:
|
|
if(rm.node == lump.node) goto excess;
|
|
else if(lump.node) /* and size <= MIN */ goto balance;
|
|
else {
|
|
assert(0); /* Root is a special case, it can have down to one. */
|
|
goto shrink;
|
|
}
|
|
balance: {
|
|
struct PB_(branch) *const lumpb = PB_(branch)(lump.node);
|
|
struct PB_(ref) child;
|
|
struct { struct PB_(node) *less, *more; } sibling;
|
|
assert(lump.height && lump.idx <= lump.node->size && lump.node->size > 0);
|
|
/* Find the child and siblings. */
|
|
child.node = lumpb->child[lump.idx];
|
|
if(child.height = lump.height - 1) PB_(find_idx)(&child, key);
|
|
else assert(child.node == rm.node), child.idx = rm.idx;
|
|
assert(child.node->size == TREE_MIN);
|
|
sibling.less = lump.idx ? lumpb->child[lump.idx - 1] : 0;
|
|
sibling.more = lump.idx < lump.node->size ? lumpb->child[lump.idx + 1] : 0;
|
|
assert(sibling.less || sibling.more);
|
|
/* Pick the sibling with the most nodes to balance. */
|
|
if((sibling.less ? sibling.less->size : 0)
|
|
> (sibling.more ? sibling.more->size : 0)) { /* Split left. */
|
|
const unsigned combined = child.node->size + sibling.less->size,
|
|
to_promote = combined / 2, to_more = to_promote + 1,
|
|
transfer = sibling.less->size - to_more;
|
|
assert(sibling.less && lump.idx
|
|
&& to_promote >= TREE_MIN && to_more <= sibling.less->size);
|
|
printf("combined %u; to_promote %u; to_more %u -> transfer %u\n",
|
|
combined, to_promote, to_more, transfer);
|
|
/* Make way for the keys from the less. */
|
|
printf("move child1 (%u)\n", child.node->size - child.idx - 1);
|
|
memmove(child.node->key + child.idx + 1 + transfer,
|
|
child.node->key + child.idx + 1,
|
|
sizeof *child.node->key * (child.node->size - child.idx - 1));
|
|
printf("move child2 (%u)\n", child.idx);
|
|
memmove(child.node->key + 1 + transfer, child.node->key,
|
|
sizeof *child.node->key * child.idx);
|
|
child.node->key[transfer] = lump.node->key[lump.idx - 1];
|
|
printf("less %u(%u) -> 0\n", to_more, transfer);
|
|
memcpy(child.node->key, sibling.less->key + to_more,
|
|
sizeof *sibling.less->key * transfer);
|
|
lump.node->key[lump.idx - 1] = sibling.less->key[to_promote];
|
|
assert(child.node->size <= TREE_MAX - transfer);
|
|
child.node->size += transfer;
|
|
sibling.less->size = (unsigned char)to_promote;
|
|
if(lump.height > 1) {
|
|
struct PB_(branch) *const lessb = PB_(branch)(sibling.less);
|
|
assert(0);
|
|
}
|
|
} else { /* Split right. ***incorrect***? */
|
|
const unsigned combined = child.node->size + sibling.more->size,
|
|
to_promote = (combined - 1) / 2, to_more = to_promote - 1;
|
|
assert(sibling.more && to_promote && to_promote < sibling.more->size);
|
|
printf("to_promote %u, to_less %u\n", to_promote, to_more);
|
|
/* Make way for the keys from the less. */
|
|
printf("move child (%u)\n", child.node->size - child.idx - 1);
|
|
memmove(child.node->key + child.idx, child.node->key + child.idx + 1,
|
|
sizeof *child.node->key * (child.node->size - child.idx - 1));
|
|
child.node->key[child.node->size - 1] = lump.node->key[lump.idx];
|
|
memcpy(child.node->key + child.node->size, sibling.more->key,
|
|
sizeof *sibling.more->key * to_more);
|
|
assert(0);
|
|
}
|
|
goto end;
|
|
merge:
|
|
if(lump.idx < lump.node->size && lumpb->child[lump.idx + 1] && (lump.node->size > TREE_MIN)) {
|
|
lean_left: /* Prefer left-leaning: less work for copying. */
|
|
/*left = child.node, right = lumpb->child[lump.idx + 1];*/
|
|
temp.store[temp.next].key = lump.node->key[lump.idx];
|
|
#ifdef TREE_VALUE
|
|
temp.store[temp.next].value = lump.node->value[lump.idx];
|
|
#endif
|
|
temp.store[temp.next].link = lumpb->child[lump.idx + 1];
|
|
/* Not necessarily! */
|
|
memmove(lump.node->key + lump.idx, lump.node->key + lump.idx + 1,
|
|
sizeof *lump.node->key * (lump.node->size - lump.idx - 1));
|
|
#ifdef TREE_VALUE
|
|
memmove(lump.node->value + lump.idx, lump.node->value + lump.idx + 1,
|
|
sizeof *lump.node->value * (lump.node->size - lump.idx - 1));
|
|
#endif
|
|
memmove(lumpb->child + lump.idx + 1, lumpb->child + lump.idx + 2,
|
|
sizeof *lumpb->child * (lump.node->size - lump.idx - 1));
|
|
lump.node->size--;
|
|
} else {
|
|
/*left = lumpb->child[lump.idx - 1], right = child.node;*/
|
|
assert(0);
|
|
}
|
|
temp.next = !temp.next;
|
|
/*printf("remove: merging %s and %s.\n", orcify(left), orcify(right));
|
|
assert(left->size == TREE_MIN && right->size == TREE_MIN);*/
|
|
assert(0);
|
|
PB_(find_idx)(&lump, key);
|
|
return 0;
|
|
} shrink: /* Every node along the path is minimal, the height decreases. */
|
|
assert(0);
|
|
return 0;
|
|
excess:
|
|
assert(rm.node && rm.idx < rm.node->size && rm.node->size > TREE_MIN
|
|
&& !rm.height);
|
|
memmove(rm.node->key + rm.idx, rm.node->key + rm.idx + 1,
|
|
sizeof *rm.node->key * (rm.node->size - rm.idx - 1));
|
|
#ifdef TREE_VALUE
|
|
memmove(rm.node->value + rm.idx, rm.node->value + rm.idx + 1,
|
|
sizeof *rm.node->value * (rm.node->size - rm.idx - 1));
|
|
#endif
|
|
rm.node->size--;
|
|
goto end;
|
|
end:
|
|
return 1;
|
|
}
|
|
|
|
|
|
|
|
/****************************/
|
|
|
|
/* All these are used in clone; it's convenient to use `\O(\log size)` stack
|
|
space. [existing branches][new branches][existing leaves][new leaves] no */
|
|
struct PB_(scaffold) {
|
|
struct tree_count victim, source;
|
|
size_t no;
|
|
struct PB_(node) **data;
|
|
struct { struct PB_(node) **head, **fresh, **cursor; } branch, leaf;
|
|
};
|
|
static int PB_(count_r)(struct PB_(tree) tree, struct tree_count *const no) {
|
|
assert(tree.node && tree.height);
|
|
if(!++no->branches) return 0;
|
|
if(tree.height == 1) {
|
|
/* Overflow; aren't guaranteed against this. */
|
|
if(no->leaves + tree.node->size + 1 < no->leaves) return 0;
|
|
no->leaves += tree.node->size + 1;
|
|
} else {
|
|
unsigned char i;
|
|
for(i = 0; i <= tree.node->size; i++) {
|
|
struct PB_(tree) child;
|
|
child.node = PB_(branch)(tree.node)->child[i];
|
|
child.height = tree.height - 1;
|
|
if(!PB_(count_r)(child, no)) return 0;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
static int PB_(count)(const struct B_(tree) *const tree,
|
|
struct tree_count *const no) {
|
|
assert(tree && no);
|
|
no->branches = no->leaves = 0;
|
|
if(!tree->root.node) { /* Idle. */
|
|
} else if(tree->root.height == UINT_MAX || !tree->root.height) {
|
|
no->leaves = 1;
|
|
} else { /* Complex. */
|
|
struct PB_(tree) sub = tree->root;
|
|
if(!PB_(count_r)(sub, no)) return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
static void PB_(cannibalize_r)(struct PB_(ref) ref,
|
|
struct PB_(scaffold) *const sc) {
|
|
struct PB_(branch) *branch = PB_(branch)(ref.node);
|
|
const int keep_branch = sc->branch.cursor < sc->branch.fresh;
|
|
assert(ref.node && ref.height && sc);
|
|
if(keep_branch) *sc->branch.cursor = ref.node, sc->branch.cursor++;
|
|
if(ref.height == 1) { /* Children are leaves. */
|
|
unsigned n;
|
|
for(n = 0; n <= ref.node->size; n++) {
|
|
const int keep_leaf = sc->leaf.cursor < sc->leaf.fresh;
|
|
struct PB_(node) *child = branch->child[n];
|
|
if(keep_leaf) *sc->leaf.cursor = child, sc->leaf.cursor++;
|
|
else free(child);
|
|
}
|
|
} else while(ref.idx <= ref.node->size) {
|
|
struct PB_(ref) child;
|
|
child.node = PB_(branch)(ref.node)->child[ref.idx];
|
|
child.height = ref.height - 1;
|
|
child.idx = 0;
|
|
PB_(cannibalize_r)(child, sc);
|
|
ref.idx++;
|
|
}
|
|
if(!keep_branch) free(branch);
|
|
}
|
|
static void PB_(cannibalize)(const struct B_(tree) *const tree,
|
|
struct PB_(scaffold) *const sc) {
|
|
struct PB_(ref) ref;
|
|
assert(tree && tree->root.height != UINT_MAX && sc);
|
|
/* Nothing to cannibalize. */
|
|
if(!sc->victim.branches && !sc->victim.leaves) return;
|
|
assert(tree->root.node);
|
|
ref.node = tree->root.node, ref.height = tree->root.height, ref.idx = 0;
|
|
sc->branch.cursor = sc->branch.head;
|
|
sc->leaf.cursor = sc->leaf.head;
|
|
if(ref.height) {
|
|
PB_(cannibalize_r)(ref, sc);
|
|
} else { /* Just one leaf. */
|
|
*sc->leaf.cursor = ref.node;
|
|
}
|
|
}
|
|
static struct PB_(node) *PB_(clone_r)(struct PB_(tree) src,
|
|
struct PB_(scaffold) *const sc) {
|
|
struct PB_(node) *node;
|
|
if(src.height) {
|
|
struct PB_(branch) *const cpyb = PB_(branch)(src.node),
|
|
*const branch = PB_(branch)(node = *sc->branch.cursor++);
|
|
unsigned i;
|
|
*node = *src.node; /* Copy node. */
|
|
src.height--;
|
|
for(i = 0; i <= src.node->size; i++) { /* Different links. */
|
|
src.node = cpyb->child[i];
|
|
branch->child[i] = PB_(clone_r)(src, sc);
|
|
}
|
|
} else { /* Leaves. */
|
|
node = *sc->leaf.cursor++;
|
|
*node = *src.node;
|
|
}
|
|
return node;
|
|
}
|
|
static struct PB_(tree) PB_(clone)(const struct PB_(tree) *const src,
|
|
struct PB_(scaffold) *const sc) {
|
|
struct PB_(tree) sub;
|
|
assert(src && src->node && sc);
|
|
/* Go back to the beginning of the scaffold and pick off one by one. */
|
|
sc->branch.cursor = sc->branch.head;
|
|
sc->leaf.cursor = sc->leaf.head;
|
|
sub.node = PB_(clone_r)(*src, sc);
|
|
sub.height = src->height;
|
|
/* Used up all of them. No concurrent modifications, please. */
|
|
assert(sc->branch.cursor == sc->leaf.head
|
|
&& sc->leaf.cursor == sc->data + sc->no);
|
|
return sub;
|
|
}
|
|
/** `source` is copied to, and overwrites, `tree`.
|
|
@param[source] In the case where it's null or idle, if `tree` is empty, then
|
|
it continues to be.
|
|
@return Success, otherwise `tree` is not modified.
|
|
@throws[malloc] @throws[EDOM] `tree` is null. @throws[ERANGE] The size of
|
|
`source` doesn't fit into `size_t`. @allow */
|
|
static int B_(tree_clone)(struct B_(tree) *const tree,
|
|
const struct B_(tree) *const source) {
|
|
struct PB_(scaffold) sc;
|
|
int success = 1;
|
|
sc.data = 0; /* Need to keep this updated to catch. */
|
|
if(!tree) { errno = EDOM; goto catch; }
|
|
/* Count the number of nodes and set up to copy. */
|
|
if(!PB_(count)(tree, &sc.victim) || !PB_(count)(source, &sc.source)
|
|
|| (sc.no = sc.source.branches + sc.source.leaves) < sc.source.branches)
|
|
{ errno = ERANGE; goto catch; } /* Overflow. */
|
|
printf("<B>tree_clone: victim.branches %zu; victim.leaves %zu; "
|
|
"source.branches %zu; source.leaves %zu.\n", sc.victim.branches,
|
|
sc.victim.leaves, sc.source.branches, sc.source.leaves);
|
|
if(!sc.no) { PB_(clear)(tree); goto finally; } /* No need to allocate. */
|
|
if(!(sc.data = malloc(sizeof *sc.data * sc.no)))
|
|
{ if(!errno) errno = ERANGE; goto catch; }
|
|
/* debug */
|
|
{
|
|
size_t i;
|
|
for(i = 0; i < sc.no; i++) sc.data[i] = 0;
|
|
}
|
|
{ /* Ready scaffold. */
|
|
struct tree_count need;
|
|
need.leaves = sc.source.leaves > sc.victim.leaves
|
|
? sc.source.leaves - sc.victim.leaves : 0;
|
|
need.branches = sc.source.branches > sc.victim.branches
|
|
? sc.source.branches - sc.victim.branches : 0;
|
|
sc.branch.head = sc.data;
|
|
sc.branch.fresh = sc.branch.cursor
|
|
= sc.branch.head + sc.source.branches - need.branches;
|
|
sc.leaf.head = sc.branch.fresh + need.branches;
|
|
sc.leaf.fresh = sc.leaf.cursor
|
|
= sc.leaf.head + sc.source.leaves - need.leaves;
|
|
assert(sc.leaf.fresh + need.leaves == sc.data + sc.no);
|
|
}
|
|
/* Add new nodes. */
|
|
while(sc.branch.cursor != sc.leaf.head) {
|
|
struct PB_(branch) *branch;
|
|
if(!(branch = malloc(sizeof *branch))) goto catch;
|
|
branch->base.size = 0;
|
|
branch->child[0] = 0;
|
|
*sc.branch.cursor++ = &branch->base;
|
|
}
|
|
while(sc.leaf.cursor != sc.data + sc.no) {
|
|
struct PB_(node) *leaf;
|
|
if(!(leaf = malloc(sizeof *leaf))) goto catch;
|
|
leaf->size = 0;
|
|
*sc.leaf.cursor++ = leaf;
|
|
}
|
|
/* Resources acquired; now we don't care about tree. */
|
|
PB_(cannibalize)(tree, &sc);
|
|
/* The scaffold has the exact number of nodes we need. Overwrite. */
|
|
tree->root = PB_(clone)(&source->root, &sc);
|
|
goto finally;
|
|
catch:
|
|
success = 0;
|
|
if(!sc.data) goto finally;
|
|
while(sc.leaf.cursor != sc.leaf.fresh) {
|
|
struct PB_(node) *leaf = *(--sc.leaf.cursor);
|
|
assert(leaf);
|
|
free(leaf);
|
|
}
|
|
while(sc.branch.cursor != sc.branch.fresh) {
|
|
struct PB_(branch) *branch = PB_(branch)(*(--sc.branch.cursor));
|
|
assert(branch);
|
|
free(branch);
|
|
}
|
|
finally:
|
|
free(sc.data); /* Temporary memory. */
|
|
return success;
|
|
}
|
|
|
|
#ifdef TREE_TEST /* <!-- test */
|
|
/* Forward-declare. */
|
|
static void (*PB_(to_string))(PB_(entry_c), char (*)[12]);
|
|
static const char *(*PB_(tree_to_string))(const struct B_(tree) *);
|
|
#include "../test/test_tree.h"
|
|
#endif /* test --> */
|
|
|
|
static void PB_(unused_base_coda)(void);
|
|
static void PB_(unused_base)(void) {
|
|
PB_(key) k;
|
|
memset(&k, 0, sizeof k);
|
|
PB_(is_element_c); PB_(forward); PB_(next_c); PB_(is_element);
|
|
B_(tree)(); B_(tree_)(0); B_(tree_iterator)(0); B_(tree_next)(0);
|
|
B_(tree_clear)(0);
|
|
B_(tree_lower_iterator)(0, k); B_(tree_lower_value)(0, k);
|
|
#ifdef TREE_VALUE
|
|
B_(tree_bulk_add)(0, k, 0); B_(tree_add)(0, k, 0);
|
|
#else
|
|
B_(tree_bulk_add)(0, k); B_(tree_add)(0, k);
|
|
#endif
|
|
B_(tree_bulk_finish)(0); B_(tree_remove)(0, k); B_(tree_clone)(0, 0);
|
|
PB_(unused_base_coda)();
|
|
}
|
|
static void PB_(unused_base_coda)(void) { PB_(unused_base)(); }
|
|
|
|
|
|
#elif defined(TREE_TO_STRING) /* base code --><!-- to string trait */
|
|
|
|
|
|
#ifdef TREE_TO_STRING_NAME
|
|
#define STR_(n) TREE_CAT(B_(tree), TREE_CAT(TREE_TO_STRING_NAME, n))
|
|
#else
|
|
#define STR_(n) TREE_CAT(B_(tree), n)
|
|
#endif
|
|
#define TO_STRING TREE_TO_STRING
|
|
#define TO_STRING_LEFT '{'
|
|
#define TO_STRING_RIGHT '}'
|
|
#include "to_string.h" /** \include */
|
|
#ifdef TREE_TEST /* <!-- expect: greedy satisfy forward-declared. */
|
|
#undef TREE_TEST
|
|
static PSTR_(to_string_fn) PB_(to_string) = PSTR_(to_string);
|
|
static const char *(*PB_(tree_to_string))(const struct B_(tree) *)
|
|
= &STR_(to_string);
|
|
#endif /* expect --> */
|
|
#undef STR_
|
|
#undef TREE_TO_STRING
|
|
#ifdef TREE_TO_STRING_NAME
|
|
#undef TREE_TO_STRING_NAME
|
|
#endif
|
|
|
|
|
|
#endif /* traits --> */
|
|
|
|
|
|
#ifdef TREE_EXPECT_TRAIT /* <!-- trait */
|
|
#undef TREE_EXPECT_TRAIT
|
|
#else /* trait --><!-- !trait */
|
|
#ifdef TREE_TEST
|
|
#error No TREE_TO_STRING traits defined for TREE_TEST.
|
|
#endif
|
|
#undef TREE_NAME
|
|
#undef TREE_KEY
|
|
#undef TREE_COMPARE
|
|
#ifdef TREE_VALUE
|
|
#undef TREE_VALUE
|
|
#endif
|
|
#ifdef TREE_TEST
|
|
#undef TREE_TEST
|
|
#endif
|
|
#undef BOX_
|
|
#undef BOX
|
|
#undef BOX_CONTENT
|
|
#undef BOX_ITERATOR
|
|
#endif /* !trait --> */
|
|
#undef TREE_TO_STRING_TRAIT
|
|
#undef TREE_TRAITS
|