interpret/src/tree.h

1742 lines
65 KiB
C
Raw Normal View History

2022-05-28 02:09:38 -04:00
/** @license 2022 Neil Edelman, distributed under the terms of the
[MIT License](https://opensource.org/licenses/MIT).
2022-12-27 02:31:08 -05:00
@abstract Stand-alone header <../src/tree.h>; examples <../test/test_tree.c>;
article <../doc/tree/tree.pdf>.
2022-05-28 02:09:38 -04:00
2022-09-08 01:13:56 -04:00
@subtitle Ordered tree
2022-12-27 02:31:08 -05:00
![Example of an order-3 tree.](../doc/tree/tree.png)
2022-05-28 02:09:38 -04:00
2022-07-06 13:02:28 -04:00
A <tag:<B>tree> is an ordered set or map contained in a tree. For memory
locality, this is implemented B-tree, described in
<Bayer, McCreight, 1972, Large>.
2022-05-28 02:09:38 -04:00
@param[TREE_NAME, TREE_KEY]
`<B>` that satisfies `C` naming conventions when mangled, required, and
2022-09-08 01:13:56 -04:00
`TREE_KEY`, a type, <typedef:<PB>key>, whose default is `unsigned int`.
`<PB>` is private, whose names are prefixed in a manner to avoid collisions.
2022-05-28 02:09:38 -04:00
@param[TREE_VALUE]
2022-12-27 02:31:08 -05:00
Optional payload to go with the type, <typedef:<PB>value>, thus making it a
map instead of a set.
2022-05-28 02:09:38 -04:00
@param[TREE_COMPARE]
2022-12-27 02:31:08 -05:00
This will define <fn:<B>compare>, a <typedef:<PB>compare_fn> that compares
keys as integer-types that results in ascending order, `a > b`. If
`TREE_COMPARE` is specified, the user most specify their own <fn:<B>compare>.
2022-05-28 02:09:38 -04:00
2022-09-08 01:13:56 -04:00
@param[TREE_ORDER]
Sets the branching factor, or order as <Knuth, 1998 Art 3>, to the range
`[3, UINT_MAX+1]`. Default 65 is tuned to an integer to pointer map, and
should be okay for most variations. 4 is isomorphic to left-leaning red-black
tree, <Sedgewick, 2008, LLRB>. The above illustration is 5.
2022-11-02 00:06:43 -04:00
@param[TREE_DEFAULT]
2022-09-08 01:13:56 -04:00
Default trait; a name that satisfies `C` naming conventions when mangled and a
2022-11-02 00:06:43 -04:00
<typedef:<PB>value> used in <fn:<B>tree<D>get>.
@param[TREE_TO_STRING]
To string trait `<STR>` contained in <src/to_string.h>. Require
`<name>[<trait>]to_string` be declared as <typedef:<PSTR>to_string_fn>.
2022-09-08 01:13:56 -04:00
2022-11-02 00:06:43 -04:00
@param[TREE_EXPECT_TRAIT, TREE_TRAIT]
Named traits are obtained by including `tree.h` multiple times with
`TREE_EXPECT_TRAIT` and then subsequently including the name in `TREE_TRAIT`.
2022-05-28 02:09:38 -04:00
@fixme merge, difference
@std C89 */
#if !defined(TREE_NAME)
2022-11-02 00:06:43 -04:00
#error Name undefined.
2022-05-28 02:09:38 -04:00
#endif
2022-11-02 00:06:43 -04:00
#if defined(TREE_TRAIT) ^ defined(BOX_TYPE)
#error TREE_TRAIT name must come after TREE_EXPECT_TRAIT.
2022-05-28 02:09:38 -04:00
#endif
2022-11-02 00:06:43 -04:00
#if defined(TREE_TEST) && (!defined(TREE_TRAIT) && !defined(TREE_TO_STRING) \
|| defined(TREE_TRAIT) && !defined(TREE_HAS_TO_STRING))
#error Test requires to string.
2022-05-28 02:09:38 -04:00
#endif
#ifndef TREE_H /* <!-- idempotent */
#define TREE_H
2022-09-08 01:13:56 -04:00
#include <stddef.h> /* That's weird. */
2022-05-28 02:09:38 -04:00
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <limits.h>
/* <Kernighan and Ritchie, 1988, p. 231>. */
2022-09-08 01:13:56 -04:00
#if defined(TREE_CAT_) || defined(TREE_CAT) || defined(B_) || defined(PB_)
2022-05-28 02:09:38 -04:00
#error Unexpected defines.
#endif
#define TREE_CAT_(n, m) n ## _ ## m
#define TREE_CAT(n, m) TREE_CAT_(n, m)
#define B_(n) TREE_CAT(TREE_NAME, n)
#define PB_(n) TREE_CAT(tree, B_(n))
2022-07-06 13:02:28 -04:00
/* Leaf: `TREE_MAX type`; branch: `TREE_MAX type + TREE_ORDER pointer`. In
<Goodrich, Tamassia, Mount, 2011, Data>, these are (a,b)-trees as
(TREE_MIN+1,TREE_MAX+1)-trees. */
2022-09-08 01:13:56 -04:00
#define TREE_MAX (TREE_ORDER - 1)
2022-05-28 02:09:38 -04:00
/* This is the worst-case branching factor; the performance will be
\O(log_{`TREE_MIN`+1} `size`). Usually this is `(TREE_MAX+1)/2-1`. However,
2022-07-06 13:02:28 -04:00
smaller values are less-eager; in the extreme,
<Johnson, Shasha, 1993, Free-at-Empty>, show good results; this has been
chosen to provide hysteresis. (Except `TREE_MAX 2`, it's fixed.) */
2022-05-28 02:09:38 -04:00
#define TREE_MIN (TREE_MAX / 3 ? TREE_MAX / 3 : 1)
#define TREE_SPLIT (TREE_ORDER / 2) /* Split index: even order left-leaning. */
2022-12-27 02:31:08 -05:00
#define TREE_RESULT X(ERROR), X(ABSENT), X(PRESENT)
2022-05-28 02:09:38 -04:00
#define X(n) TREE_##n
/** A result of modifying the tree, of which `TREE_ERROR` is false.
2022-09-08 01:13:56 -04:00
2022-12-27 02:31:08 -05:00
![A diagram of the result states.](../doc/tree/result.png) */
2022-05-28 02:09:38 -04:00
enum tree_result { TREE_RESULT };
#undef X
#define X(n) #n
/** A static array of strings describing the <tag:tree_result>. */
static const char *const tree_result_str[] = { TREE_RESULT };
#undef X
#undef TREE_RESULT
2022-09-08 01:13:56 -04:00
struct tree_node_count { size_t branches, leaves; };
2022-05-28 02:09:38 -04:00
#endif /* idempotent --> */
2022-11-02 00:06:43 -04:00
#ifndef TREE_TRAIT /* <!-- base code */
2022-05-28 02:09:38 -04:00
2022-09-08 01:13:56 -04:00
#ifndef TREE_ORDER
#define TREE_ORDER 65 /* Maximum branching factor. This sets the granularity. */
#endif
#if TREE_ORDER < 3 || TREE_ORDER > UINT_MAX + 1
#error TREE_ORDER parameter range `[3, UINT_MAX+1]`.
#endif
2022-05-28 02:09:38 -04:00
#ifndef TREE_KEY
#define TREE_KEY unsigned
#endif
2022-09-08 01:13:56 -04:00
/** Ordered type used by <typedef:<PB>compare_fn>; defaults to `unsigned`. */
2022-05-28 02:09:38 -04:00
typedef TREE_KEY PB_(key);
#ifdef TREE_VALUE
2022-09-08 01:13:56 -04:00
/** On `TREE_VALUE`, this creates a map, otherwise a set of
<typedef:<PB>key>. */
2022-05-28 02:09:38 -04:00
typedef TREE_VALUE PB_(value);
#endif
/** Returns a positive result if `a` is out-of-order with respect to `b`,
inducing a strict weak order. This is compatible, but less strict then the
2022-11-02 00:06:43 -04:00
comparators from `bsearch` and `qsort`; it only needs to divide entries into
two instead of three categories. */
2022-12-27 02:31:08 -05:00
typedef int (*PB_(compare_fn))(const PB_(key) a, const PB_(key) b);
2022-05-28 02:09:38 -04:00
#ifndef TREE_COMPARE /* <!-- !cmp */
/** The default `TREE_COMPARE` on `a` and `b` is integer comparison that
2022-12-27 02:31:08 -05:00
results in ascending order, `a > b`. Use `TREE_COMPARE` to supply one's own.
@implements <typedef:<PB>compare_fn> */
static int B_(compare)(const PB_(key) a, const PB_(key) b)
2022-05-28 02:09:38 -04:00
{ return a > b; }
#define TREE_COMPARE &PB_(default_compare)
#endif /* !cmp --> */
2022-07-06 13:02:28 -04:00
/* These rules are more lazy than the original so as to not exhibit worst-case
behaviour in small trees, as <Johnson, Shasha, 1993, Free-at-Empty>, (lookup
is potentially slower after deleting.) In the terminology of
<Knuth, 1998 Art 3>,
2022-05-28 02:09:38 -04:00
* Every branch has at most `TREE_ORDER == TREE_MAX + 1` children, which is at
minimum three.
* Every non-root and non-bulk-loaded node has at least `TREE_MIN` keys,
(`TREE_MAX/3`.)
* Every branch has at least one child, `k`, and contains `k - 1` keys, (this
is a consequence of the fact that they are implicitly storing a complete
binary sub-tree.)
* All leaves are at the maximum depth and height zero; they do'n't carry links
2022-07-06 13:02:28 -04:00
to other nodes, (hence, leaf.) In this code, a branch node is a
specialization of a (leaf) node with children. One can tell if it's a branch
by keeping track of the height.
2022-05-28 02:09:38 -04:00
* There are two empty B-trees to facilitate allocation hysteresis between
0 -- 1: idle `{ 0, 0 }`, and `{ garbage leaf, UINT_MAX }`, one could test,
`!root || height == UINT_MAX`.
2022-07-06 13:02:28 -04:00
* Bulk-loading always is on the right side. */
2022-05-28 02:09:38 -04:00
struct PB_(node) {
2022-09-08 01:13:56 -04:00
unsigned size;
2022-05-28 02:09:38 -04:00
PB_(key) key[TREE_MAX]; /* Cache-friendly lookup. */
#ifdef TREE_VALUE
PB_(value) value[TREE_MAX];
#endif
};
/* B-tree branch is a <tag:<PB>node> and links to `size + 1` nodes. */
struct PB_(branch) { struct PB_(node) base, *child[TREE_ORDER]; };
2022-09-08 01:13:56 -04:00
/** @return Downcasts `as_leaf` to a branch. */
static struct PB_(branch) *PB_(as_branch)(struct PB_(node) *const as_leaf)
2022-05-28 02:09:38 -04:00
{ return (struct PB_(branch) *)(void *)
((char *)as_leaf - offsetof(struct PB_(branch), base)); }
2022-07-06 13:02:28 -04:00
/** @return Downcasts `as_node` to a branch. */
2022-09-08 01:13:56 -04:00
static const struct PB_(branch) *PB_(as_branch_c)(const struct PB_(node) *
2022-05-28 02:09:38 -04:00
const as_node) { return (const struct PB_(branch) *)(const void *)
((const char *)as_node - offsetof(struct PB_(branch), base)); }
2022-09-08 01:13:56 -04:00
/* Address of a specific key by node. There is a need for node plus index
without height, but we'll just let height be unused. */
2022-05-28 02:09:38 -04:00
struct PB_(ref) { struct PB_(node) *node; unsigned height, idx; };
2022-12-27 02:31:08 -05:00
/* Node plus height is a sub-tree. A <tag:<B>tree> is a sub-tree of the tree.
fixme: 0-based height is problematic. UINT_MAX? No. */
2022-07-06 13:02:28 -04:00
struct PB_(tree) { struct PB_(node) *node; unsigned height; };
2022-09-08 01:13:56 -04:00
/** To initialize it to an idle state, see <fn:<B>tree>, `{0}` (`C99`), or
being `static`.
2022-07-06 13:02:28 -04:00
2022-12-27 02:31:08 -05:00
![States.](../doc/tree/states.png) */
2022-07-06 13:02:28 -04:00
struct B_(tree);
struct B_(tree) { struct PB_(tree) root; };
2022-05-28 02:09:38 -04:00
#ifdef TREE_VALUE /* <!-- value */
2022-09-08 01:13:56 -04:00
/** Gets the value of `ref`. */
static PB_(value) *PB_(ref_to_valuep)(const struct PB_(ref) ref)
2022-05-28 02:09:38 -04:00
{ return ref.node ? ref.node->value + ref.idx : 0; }
#else /* value --><!-- !value */
typedef PB_(key) PB_(value);
2022-09-08 01:13:56 -04:00
/** Gets the value of `ref`. */
static PB_(value) *PB_(ref_to_valuep)(const struct PB_(ref) ref)
2022-05-28 02:09:38 -04:00
{ return ref.node ? ref.node->key + ref.idx : 0; }
#endif /* !value --> */
2022-11-02 00:06:43 -04:00
struct PB_(iterator) { struct PB_(tree) *root; struct PB_(ref) ref; int seen; };
2022-09-08 01:13:56 -04:00
/** @return Before the start of `tree`, (can be null.) @implements `begin` */
2022-12-27 02:31:08 -05:00
static struct PB_(iterator) PB_(begin)(struct B_(tree) *const tree) {
2022-11-02 00:06:43 -04:00
struct PB_(iterator) it;
2022-12-27 02:31:08 -05:00
it.root = tree ? &tree->root : 0;
it.ref.height = tree ? tree->root.height : 0;
if(tree && tree->root.height != UINT_MAX)
2022-09-08 01:13:56 -04:00
for(it.ref.node = tree->root.node; it.ref.height;
2022-12-27 02:31:08 -05:00
it.ref.node = PB_(as_branch)(it.ref.node)->child[0], it.ref.height--);
else it.ref.node = 0;
it.ref.idx = 0;
it.seen = 0;
2022-09-08 01:13:56 -04:00
return it;
}
2022-12-27 02:31:08 -05:00
/** @return After the end of `tree`, (can be null.) @implements `end` */
2022-11-02 00:06:43 -04:00
static struct PB_(iterator) PB_(end)(struct B_(tree) *const tree) {
struct PB_(iterator) it;
2022-12-27 02:31:08 -05:00
it.root = tree ? &tree->root : 0;
it.ref.height = tree ? tree->root.height : 0;
if(tree && tree->root.height != UINT_MAX)
2022-09-08 01:13:56 -04:00
for(it.ref.node = tree->root.node; it.ref.height;
2022-12-27 02:31:08 -05:00
it.ref.node = PB_(as_branch)(it.ref.node)->child[it.ref.node->size],
it.ref.height--);
else it.ref.node = 0;
it.ref.idx = it.ref.node ? it.ref.node->size : 0;
it.seen = 0;
2022-05-28 02:09:38 -04:00
return it;
}
2022-12-27 02:31:08 -05:00
/** @return Whether `it` advances, filling `ref`. @implements `next` */
static int PB_(next)(struct PB_(iterator) *const it,
struct PB_(ref) **const ref) {
struct PB_(ref) adv;
2022-07-06 13:02:28 -04:00
assert(it);
2022-12-27 02:31:08 -05:00
if(!it->root || !it->ref.node) return it->seen = 0, 0;
if(!it->root->node || it->root->height == UINT_MAX)
return it->ref.node = 0, 0; /* Concurrent modification? */
adv = it->ref; /* Shorten keystrokes and work with a copy. */
if(!it->seen && adv.idx < adv.node->size) goto successor;
adv.idx++;
if(adv.height && adv.idx > adv.node->size)
return it->ref.node = 0, 0; /* Concurrent modification? */
while(adv.height) adv.height--,
adv.node = PB_(as_branch)(adv.node)->child[adv.idx], adv.idx = 0;
if(adv.idx < adv.node->size) goto successor; /* Likely. */
/* Bulk-loading or concurrent modification? */
if(adv.idx > adv.node->size) return it->ref.node = 0, 0;
{ /* Re-descend; pick the minimum height node that has a next key. */
struct PB_(ref) next;
struct PB_(tree) tree = *it->root;
unsigned a0;
const PB_(key) x = adv.node->key[adv.node->size - 1]; /* Target. */
for(next.node = 0; tree.height;
tree.node = PB_(as_branch)(tree.node)->child[a0], tree.height--) {
unsigned a1 = tree.node->size;
a0 = 0;
while(a0 < a1) {
const unsigned m = (a0 + a1) / 2;
if(B_(compare)(x, tree.node->key[m]) > 0) a0 = m + 1;
else a1 = m;
}
if(a0 < tree.node->size) next.node = tree.node,
next.height = tree.height, next.idx = a0;
}
if(!next.node) return it->seen = 0, 0; /* Off right. */
adv = next;
} /* Jumped nodes. */
successor:
it->seen = 1;
it->ref = adv;
if(ref) *ref = &it->ref;
return 1;
2022-07-06 13:02:28 -04:00
}
2022-12-27 02:31:08 -05:00
/** @return Whether `it` recedes, filling `v`. @implements `next` */
static int PB_(previous)(struct PB_(iterator) *const it,
struct PB_(ref) **const v) {
struct PB_(ref) prd;
2022-09-08 01:13:56 -04:00
assert(it);
2022-12-27 02:31:08 -05:00
if(!it->root || !it->ref.node) return it->seen = 0, 0;
if(!it->root->node || it->root->height == UINT_MAX)
return it->ref.node = 0, 0; /* Concurrent modification? */
prd = it->ref; /* Shorten keystrokes and work with a copy. */
if(prd.idx > prd.node->size) prd.idx = prd.node->size; /* Clip. */
if(!it->seen && prd.idx) { prd.idx--; goto predecessor; }
while(prd.height) prd.height--,
prd.node = PB_(as_branch)(prd.node)->child[prd.idx],
prd.idx = prd.node->size;
if(prd.idx) { prd.idx--; goto predecessor; } /* Likely. */
{ /* Re-descend; pick the minimum height node that has a previous key. */
struct PB_(ref) prev;
struct PB_(tree) tree = *it->root;
unsigned a0;
const PB_(key) x = prd.node->key[0]; /* Target. */
for(prev.node = 0; tree.height;
tree.node = PB_(as_branch)(tree.node)->child[a0], tree.height--) {
unsigned a1 = tree.node->size;
a0 = 0;
while(a0 < a1) {
const unsigned m = (a0 + a1) / 2;
if(B_(compare)(x, tree.node->key[m]) > 0) a0 = m + 1;
else a1 = m;
}
if(a0) prev.node = tree.node, prev.height = tree.height,
prev.idx = a0 - 1;
}
if(!prev.node) return it->seen = 0, 0; /* Off left. */
prd = prev;
} /* Jumped nodes. */
predecessor:
it->seen = 1;
it->ref = prd;
if(v) *v = &it->ref;
return 1;
2022-05-28 02:09:38 -04:00
}
2022-09-08 01:13:56 -04:00
/* Want to find slightly different things; code re-use is bad. Confusing.
This is the lower-bound. */
2022-07-06 13:02:28 -04:00
#define TREE_FORTREE(i) i.node = tree->node, i.height = tree->height; ; \
2022-09-08 01:13:56 -04:00
i.node = PB_(as_branch_c)(i.node)->child[i.idx], i.height--
2022-07-06 13:02:28 -04:00
#define TREE_START(i) unsigned hi = i.node->size; i.idx = 0;
2022-09-08 01:13:56 -04:00
#define TREE_FORNODE(i) do { \
2022-07-06 13:02:28 -04:00
const unsigned m = (i.idx + hi) / 2; \
2022-12-27 02:31:08 -05:00
if(B_(compare)(key, i.node->key[m]) > 0) i.idx = m + 1; \
2022-07-06 13:02:28 -04:00
else hi = m; \
} while(i.idx < hi);
2022-12-27 02:31:08 -05:00
#define TREE_FLIPPED(i) B_(compare)(i.node->key[i.idx], key) <= 0
2022-09-08 01:13:56 -04:00
/** Finds `key` in `lo` one node at a time. */
2022-05-28 02:09:38 -04:00
static void PB_(find_idx)(struct PB_(ref) *const lo, const PB_(key) key) {
2022-07-06 13:02:28 -04:00
TREE_START((*lo))
2022-09-08 01:13:56 -04:00
if(!lo) return;
TREE_FORNODE((*lo))
2022-05-28 02:09:38 -04:00
}
2022-09-08 01:13:56 -04:00
/** Finds lower-bound of `key` in non-empty `tree`, or, if `key` is greater
than all `tree`, one off the end. */
2022-07-06 13:02:28 -04:00
static struct PB_(ref) PB_(lower_r)(struct PB_(tree) *const tree,
const PB_(key) key) {
struct PB_(ref) i, lo = { 0, 0, 0 };
for(TREE_FORTREE(i)) {
TREE_START(i)
2022-09-08 01:13:56 -04:00
if(!hi) continue;
TREE_FORNODE(i)
2022-07-06 13:02:28 -04:00
if(i.idx < i.node->size) {
lo = i;
2022-09-08 01:13:56 -04:00
if(TREE_FLIPPED(i)) break; /* Multi-keys go here. */
}
if(!i.height) {
if(!lo.node) lo = i; /* Want one-off-end if last. */
break;
2022-07-06 13:02:28 -04:00
}
}
return lo;
}
2022-09-08 01:13:56 -04:00
/** @return Lower bound of `x` in `tree`. @order \O(\log |`tree`|) */
static struct PB_(ref) PB_(lower)(struct PB_(tree) tree, const PB_(key) x) {
if(!tree.node || tree.height == UINT_MAX) {
struct PB_(ref) ref; ref.node = 0; return ref;
} else {
return PB_(lower_r)(&tree, x);
}
}
/** Finds an exact `key` in non-empty `tree`. */
static struct PB_(ref) PB_(find)(const struct PB_(tree) *const tree,
const PB_(key) key) {
struct PB_(ref) i;
for(TREE_FORTREE(i)) {
TREE_START(i)
if(!hi) continue;
TREE_FORNODE(i)
if(i.idx < i.node->size && TREE_FLIPPED(i)) break;
if(!i.height) { i.node = 0; return i; }
}
return i;
}
/** Finds lower-bound of `key` in non-empty `tree` while counting the
non-filled `hole` and `is_equal`. */
2022-07-06 13:02:28 -04:00
static struct PB_(ref) PB_(lookup_insert)(struct PB_(tree) *const tree,
const PB_(key) key, struct PB_(ref) *const hole, int *const is_equal) {
2022-05-28 02:09:38 -04:00
struct PB_(ref) lo;
2022-07-06 13:02:28 -04:00
hole->node = 0;
for(TREE_FORTREE(lo)) {
TREE_START(lo)
if(hi < TREE_MAX) *hole = lo;
2022-09-08 01:13:56 -04:00
if(!hi) continue;
TREE_FORNODE(lo)
2022-07-06 13:02:28 -04:00
if(lo.node->size < TREE_MAX) hole->idx = lo.idx;
if(lo.idx < lo.node->size && TREE_FLIPPED(lo)) { *is_equal = 1; break; }
if(!lo.height) break;
2022-05-28 02:09:38 -04:00
}
return lo;
}
2022-09-08 01:13:56 -04:00
/** Finds exact `key` in non-empty `tree`. If `node` is found, temporarily, the
nodes that have `TREE_MIN` keys have
`as_branch(node).child[TREE_MAX] = parent` or, for leaves, `leaf_parent`,
which must be set. (Patently terrible for running concurrently; hack, would be
nice to go down tree maybe.) */
2022-07-06 13:02:28 -04:00
static struct PB_(ref) PB_(lookup_remove)(struct PB_(tree) *const tree,
2022-09-08 01:13:56 -04:00
const PB_(key) key, struct PB_(node) **leaf_parent) {
struct PB_(node) *parent = 0;
2022-07-06 13:02:28 -04:00
struct PB_(ref) lo;
for(TREE_FORTREE(lo)) {
TREE_START(lo)
2022-09-08 01:13:56 -04:00
/* Cannot delete bulk add. */
if(parent && hi < TREE_MIN || !parent && !hi) { lo.node = 0; break; }
if(hi <= TREE_MIN) { /* Remember the parent temporarily. */
if(lo.height) PB_(as_branch)(lo.node)->child[TREE_MAX] = parent;
else *leaf_parent = parent;
}
TREE_FORNODE(lo)
2022-07-06 13:02:28 -04:00
if(lo.idx < lo.node->size && TREE_FLIPPED(lo)) break;
if(!lo.height) { lo.node = 0; break; } /* Was not in. */
2022-09-08 01:13:56 -04:00
parent = lo.node;
2022-07-06 13:02:28 -04:00
}
return lo;
}
#undef TREE_FORTREE
#undef TREE_START
#undef TREE_FORNODE
#undef TREE_FLIPPED
2022-05-28 02:09:38 -04:00
2022-09-08 01:13:56 -04:00
/** Zeroed data (not all-bits-zero) is initialized. @return An idle tree.
@order \Theta(1) @allow */
static struct B_(tree) B_(tree)(void) {
struct B_(tree) tree;
tree.root.node = 0; tree.root.height = 0;
return tree;
2022-05-28 02:09:38 -04:00
}
2022-09-08 01:13:56 -04:00
/** Private: frees non-empty `tree` and it's children recursively, but doesn't
put it to idle or clear pointers.
@param[keep] Keep one leaf if non-null. Set to null before. */
2022-07-06 13:02:28 -04:00
static void PB_(clear_r)(struct PB_(tree) tree, struct PB_(node) **const keep) {
assert(tree.node);
if(!tree.height) {
if(keep && !*keep) *keep = tree.node;
else free(tree.node);
2022-05-28 02:09:38 -04:00
} else {
2022-07-06 13:02:28 -04:00
struct PB_(tree) child;
2022-05-28 02:09:38 -04:00
unsigned i;
2022-07-06 13:02:28 -04:00
child.height = tree.height - 1;
for(i = 0; i <= tree.node->size; i++)
2022-09-08 01:13:56 -04:00
child.node = PB_(as_branch)(tree.node)->child[i],
2022-07-06 13:02:28 -04:00
PB_(clear_r)(child, keep);
2022-09-08 01:13:56 -04:00
free(PB_(as_branch)(tree.node));
2022-05-28 02:09:38 -04:00
}
}
2022-09-08 01:13:56 -04:00
/** Private: `tree` can be null. */
2022-07-06 13:02:28 -04:00
static void PB_(clear)(struct B_(tree) *tree) {
struct PB_(node) *one = 0;
/* Already not there/idle/empty. */
if(!tree || !tree->root.node || tree->root.height == UINT_MAX) return;
PB_(clear_r)(tree->root, &one), assert(one);
/* This is a special state where the tree has one leaf, but it is empty.
2022-09-08 01:13:56 -04:00
This state exists because it gives hysteresis to 0 -- 1 transition because
we have no advanced memory management. */
2022-07-06 13:02:28 -04:00
tree->root.node = one;
tree->root.height = UINT_MAX;
}
2022-09-08 01:13:56 -04:00
/** Returns an initialized `tree` to idle, `tree` can be null.
@order \O(|`tree`|) @allow */
2022-05-28 02:09:38 -04:00
static void B_(tree_)(struct B_(tree) *const tree) {
if(!tree) return; /* Null. */
if(!tree->root.node) { /* Idle. */
assert(!tree->root.height);
} else if(tree->root.height == UINT_MAX) { /* Empty. */
2022-07-06 13:02:28 -04:00
assert(tree->root.node), free(tree->root.node);
2022-05-28 02:09:38 -04:00
} else {
PB_(clear_r)(tree->root, 0);
}
*tree = B_(tree)();
}
2022-09-08 01:13:56 -04:00
/** Clears `tree`, which can be null, idle, empty, or full. If it is empty or
full, it remains active. @order \O(|`tree`|) @allow */
static void B_(tree_clear)(struct B_(tree) *const tree) { PB_(clear)(tree); }
/** Private: counts a sub-tree, `tree`. */
static size_t PB_(count_r)(const struct PB_(tree) tree) {
size_t c = tree.node->size;
if(tree.height) {
const struct PB_(branch) *const branch = PB_(as_branch)(tree.node);
struct PB_(tree) sub;
size_t i;
sub.height = tree.height - 1;
for(i = 0; i <= tree.node->size; i++) {
sub.node = branch->child[i];
c += PB_(count_r)(sub);
}
}
return c;
}
/** Counts all the keys on `tree`, which can be null.
@order \O(|`tree`|) @allow */
static size_t B_(tree_count)(const struct B_(tree) *const tree) {
return tree && tree->root.height != UINT_MAX
? PB_(count_r)(tree->root) : 0;
2022-05-28 02:09:38 -04:00
}
2022-09-08 01:13:56 -04:00
/** @return Is `x` in `tree`? @order \O(\log |`tree`|) @allow */
static int B_(tree_contains)(const struct B_(tree) *const tree,
const PB_(key) x) {
return tree && tree->root.node && tree->root.height != UINT_MAX
&& PB_(find)(&tree->root, x).node ? 1 : 0;
}
/* fixme: entry <B>tree_query -- there is no functionality that returns the
key. */
/** @return Get the value of `key` in `tree`, or if no key, `default_value`.
The map type is `TREE_VALUE` and the set type is `TREE_KEY`.
@order \O(\log |`tree`|) @allow */
static PB_(value) B_(tree_get_or)(const struct B_(tree) *const tree,
const PB_(key) key, const PB_(value) default_value) {
struct PB_(ref) ref;
return tree && tree->root.node && tree->root.height != UINT_MAX
&& (ref = PB_(find)(&tree->root, key)).node
? *PB_(ref_to_valuep)(ref) : default_value;
}
/** For example, `tree = { 10 }`, `x = 5 -> 10`, `x = 10 -> 10`,
`x = 11 -> null`. (There is no upper value.)
@return Lower-bound value match for `key` in `tree` or `default_value` if
`key` is greater than all in `tree`. The map type is `TREE_VALUE` and the set
type is `TREE_KEY`. @order \O(\log |`tree`|) @allow */
static PB_(value) B_(tree_at_or)(const struct B_(tree) *const tree,
const PB_(key) key, const PB_(value) default_value) {
struct PB_(ref) ref;
return tree && (ref = PB_(lower)(tree->root, key)).node
&& ref.idx < ref.node->size ? *PB_(ref_to_valuep)(ref) : default_value;
}
2022-05-28 02:09:38 -04:00
#ifdef TREE_VALUE /* <!-- map */
/** Packs `key` on the right side of `tree` without doing the usual
2022-07-06 13:02:28 -04:00
restructuring. All other topology modification functions should be avoided
until followed by <fn:<B>tree_bulk_finish>.
2022-05-28 02:09:38 -04:00
@param[value] A pointer to the key's value which is set by the function on
returning true. A null pointer in this parameter causes the value to go
uninitialized. This parameter is not there if one didn't specify `TREE_VALUE`.
@return One of <tag:tree_result>: `TREE_ERROR` and `errno` will be set,
2022-09-08 01:13:56 -04:00
`TREE_PRESENT` if the key is already (the highest) in the tree, and
2022-12-27 02:31:08 -05:00
`TREE_ABSENT`, added, the `value` (if applicable) is uninitialized.
2022-09-08 01:13:56 -04:00
@throws[EDOM] `x` is smaller than the largest key in `tree`. @throws[malloc]
@order \O(\log |`tree`|) @allow */
2022-05-28 02:09:38 -04:00
static enum tree_result B_(tree_bulk_add)(struct B_(tree) *const tree,
2022-09-08 01:13:56 -04:00
PB_(key) key, PB_(value) **const value) {
2022-05-28 02:09:38 -04:00
#else /* map --><!-- set */
static enum tree_result B_(tree_bulk_add)(struct B_(tree) *const tree,
2022-09-08 01:13:56 -04:00
PB_(key) key) {
2022-05-28 02:09:38 -04:00
#endif
struct PB_(node) *node = 0, *head = 0; /* The original and new. */
assert(tree);
if(!tree->root.node) { /* Idle tree. */
assert(!tree->root.height);
if(!(node = malloc(sizeof *node))) goto catch;
node->size = 0;
tree->root.node = node;
} else if(tree->root.height == UINT_MAX) { /* Empty tree. */
2022-09-08 01:13:56 -04:00
node = tree->root.node;
2022-05-28 02:09:38 -04:00
tree->root.height = 0;
2022-09-08 01:13:56 -04:00
node->size = 0;
2022-05-28 02:09:38 -04:00
} else {
2022-07-06 13:02:28 -04:00
struct PB_(tree) unfull = { 0, 0 };
2022-05-28 02:09:38 -04:00
unsigned new_nodes, n; /* Count new nodes. */
struct PB_(node) *tail = 0, *last = 0;
struct PB_(branch) *pretail = 0;
2022-07-06 13:02:28 -04:00
struct PB_(tree) scout;
2022-09-08 01:13:56 -04:00
PB_(key) max;
/* Right side bottom: `last` node with any keys, `unfull` not full. */
for(scout = tree->root; ; scout.node = PB_(as_branch)(scout.node)
2022-05-28 02:09:38 -04:00
->child[scout.node->size], scout.height--) {
if(scout.node->size < TREE_MAX) unfull = scout;
if(scout.node->size) last = scout.node;
if(!scout.height) break;
}
2022-09-08 01:13:56 -04:00
assert(last), max = last->key[last->size - 1];
2022-12-27 02:31:08 -05:00
if(B_(compare)(max, key) > 0) return errno = EDOM, TREE_ERROR;
if(B_(compare)(key, max) <= 0) {
2022-05-28 02:09:38 -04:00
#ifdef TREE_VALUE
2022-09-08 01:13:56 -04:00
if(value) {
struct PB_(ref) max_ref;
max_ref.node = last, max_ref.idx = last->size - 1;
*value = PB_(ref_to_valuep)(max_ref);
2022-05-28 02:09:38 -04:00
}
#endif
2022-09-08 01:13:56 -04:00
return TREE_PRESENT;
2022-05-28 02:09:38 -04:00
}
/* One leaf, and the rest branches. */
new_nodes = n = unfull.node ? unfull.height : tree->root.height + 2;
if(!n) {
node = unfull.node;
} else {
if(!(node = tail = malloc(sizeof *tail))) goto catch;
tail->size = 0;
while(--n) {
struct PB_(branch) *b;
if(!(b = malloc(sizeof *b))) goto catch;
b->base.size = 0;
if(!head) b->child[0] = 0, pretail = b; /* First loop. */
else b->child[0] = head; /* Not first loop. */
head = &b->base;
}
}
/* Post-error; modify the original as needed. */
if(pretail) pretail->child[0] = tail;
else head = node;
if(!unfull.node) { /* Add tree to head. */
2022-09-08 01:13:56 -04:00
struct PB_(branch) *const branch = PB_(as_branch)(head);
2022-05-28 02:09:38 -04:00
assert(new_nodes > 1);
branch->child[1] = branch->child[0];
branch->child[0] = tree->root.node;
node = tree->root.node = head, tree->root.height++;
} else if(unfull.height) { /* Add head to tree. */
2022-09-08 01:13:56 -04:00
struct PB_(branch) *const branch
= PB_(as_branch)(node = unfull.node);
2022-05-28 02:09:38 -04:00
assert(new_nodes);
branch->child[branch->base.size + 1] = head;
}
}
assert(node && node->size < TREE_MAX);
node->key[node->size] = key;
#ifdef TREE_VALUE
if(value) {
2022-09-08 01:13:56 -04:00
struct PB_(ref) max_ref;
max_ref.node = node, max_ref.idx = node->size;
*value = PB_(ref_to_valuep)(max_ref);
2022-05-28 02:09:38 -04:00
}
#endif
node->size++;
2022-12-27 02:31:08 -05:00
return TREE_ABSENT;
2022-09-08 01:13:56 -04:00
catch: /* Didn't work. Reset. */
free(node);
2022-07-06 13:02:28 -04:00
while(head) {
2022-09-08 01:13:56 -04:00
struct PB_(node) *const next = PB_(as_branch)(head)->child[0];
2022-07-06 13:02:28 -04:00
free(head);
2022-05-28 02:09:38 -04:00
head = next;
}
if(!errno) errno = ERANGE;
return TREE_ERROR;
2022-09-08 01:13:56 -04:00
#ifdef TREE_VALUE
2022-05-28 02:09:38 -04:00
}
2022-09-08 01:13:56 -04:00
#else
}
#endif
2022-05-28 02:09:38 -04:00
2022-09-08 01:13:56 -04:00
/** Distributes `tree` (can be null) on the right side so that, after a series
of <fn:<B>tree_bulk_add>, it will be consistent with the minimum number of
keys in a node. @return The re-distribution was a success and all nodes are
within rules. (Only when intermixing bulk and regular operations, can the
function return false.) @order \O(\log |`tree`|) @allow */
2022-05-28 02:09:38 -04:00
static int B_(tree_bulk_finish)(struct B_(tree) *const tree) {
2022-07-06 13:02:28 -04:00
struct PB_(tree) s;
2022-05-28 02:09:38 -04:00
struct PB_(node) *right;
if(!tree || !tree->root.node || tree->root.height == UINT_MAX) return 1;
for(s = tree->root; s.height; s.node = right, s.height--) {
unsigned distribute, right_want, right_move, take_sibling;
2022-09-08 01:13:56 -04:00
struct PB_(branch) *parent = PB_(as_branch)(s.node);
2022-05-28 02:09:38 -04:00
struct PB_(node) *sibling = (assert(parent->base.size),
parent->child[parent->base.size - 1]);
right = parent->child[parent->base.size];
2022-09-08 01:13:56 -04:00
/* Should this be increased to max/2 instead of max/3 to make a more
balanced tree? Otoh, why? */
2022-05-28 02:09:38 -04:00
if(TREE_MIN <= right->size) continue; /* Has enough. */
distribute = sibling->size + right->size;
2022-07-06 13:02:28 -04:00
/* Should have at least `TREE_MAX` on left. */
2022-05-28 02:09:38 -04:00
if(distribute < 2 * TREE_MIN) return 0;
right_want = distribute / 2;
right_move = right_want - right->size;
take_sibling = right_move - 1;
/* Either the right has met the properties of a B-tree node, (covered
above,) or the left sibling is full from bulk-loading (relaxed.) */
assert(right->size < right_want && right_want >= TREE_MIN
&& sibling->size - take_sibling >= TREE_MIN + 1);
/* Move the right node to accept more keys. */
memmove(right->key + right_move, right->key,
sizeof *right->key * right->size);
#ifdef TREE_VALUE
memmove(right->value + right_move, right->value,
sizeof *right->value * right->size);
#endif
if(s.height > 1) { /* (Parent height.) */
2022-09-08 01:13:56 -04:00
struct PB_(branch) *rbranch = PB_(as_branch)(right),
*sbranch = PB_(as_branch)(sibling);
2022-05-28 02:09:38 -04:00
memmove(rbranch->child + right_move, rbranch->child,
sizeof *rbranch->child * (right->size + 1));
memcpy(rbranch->child, sbranch->child + sibling->size + 1
- right_move, sizeof *sbranch->child * right_move);
}
right->size += right_move;
/* Move one node from the parent. */
memcpy(right->key + take_sibling,
parent->base.key + parent->base.size - 1, sizeof *right->key);
#ifdef TREE_VALUE
memcpy(right->value + take_sibling,
parent->base.value + parent->base.size - 1, sizeof *right->value);
#endif
/* Move the others from the sibling. */
memcpy(right->key, sibling->key + sibling->size - take_sibling,
sizeof *right->key * take_sibling);
#ifdef TREE_VALUE
memcpy(right->value, sibling->value + sibling->size - take_sibling,
sizeof *right->value * take_sibling);
#endif
sibling->size -= take_sibling;
/* Sibling's key is now the parent's. */
memcpy(parent->base.key + parent->base.size - 1,
sibling->key + sibling->size - 1, sizeof *right->key);
#ifdef TREE_VALUE
memcpy(parent->base.value + parent->base.size - 1,
sibling->value + sibling->size - 1, sizeof *right->value);
#endif
sibling->size--;
}
return 1;
}
#ifdef TREE_VALUE /* <!-- map */
2022-09-08 01:13:56 -04:00
/** Adds or updates `key` in `root`. If not-null, `eject` will be the replaced
key, otherwise don't replace. If `value` is not-null, sticks the associated
value. */
static enum tree_result PB_(update)(struct PB_(tree) *const root,
PB_(key) key, PB_(key) *const eject, PB_(value) **const value) {
2022-05-28 02:09:38 -04:00
#else /* map --><!-- set */
2022-09-08 01:13:56 -04:00
static enum tree_result PB_(update)(struct PB_(tree) *const root,
PB_(key) key, PB_(key) *const eject) {
2022-07-06 13:02:28 -04:00
#endif /* set --> */
struct PB_(node) *new_head = 0;
2022-11-02 00:06:43 -04:00
struct PB_(ref) add, hole, iterator;
2022-09-08 01:13:56 -04:00
assert(root);
if(!(add.node = root->node)) goto idle;
else if(root->height == UINT_MAX) goto empty;
2022-07-06 13:02:28 -04:00
goto descend;
2022-05-28 02:09:38 -04:00
idle: /* No reserved memory. */
2022-09-08 01:13:56 -04:00
assert(!add.node && !root->height);
2022-05-28 02:09:38 -04:00
if(!(add.node = malloc(sizeof *add.node))) goto catch;
2022-09-08 01:13:56 -04:00
root->node = add.node;
root->height = UINT_MAX;
2022-05-28 02:09:38 -04:00
goto empty;
empty: /* Reserved dynamic memory, but tree is empty. */
2022-09-08 01:13:56 -04:00
assert(add.node && root->height == UINT_MAX);
add.height = root->height = 0;
2022-05-28 02:09:38 -04:00
add.node->size = 0;
add.idx = 0;
goto insert;
2022-07-06 13:02:28 -04:00
descend: /* Record last node that has space. */
{
int is_equal = 0;
2022-09-08 01:13:56 -04:00
add = PB_(lookup_insert)(root, key, &hole, &is_equal);
2022-07-06 13:02:28 -04:00
if(is_equal) {
2022-09-08 01:13:56 -04:00
if(eject) {
*eject = add.node->key[add.idx];
add.node->key[add.idx] = key;
}
2022-07-06 13:02:28 -04:00
#ifdef TREE_VALUE
2022-09-08 01:13:56 -04:00
if(value) *value = PB_(ref_to_valuep)(add);
2022-07-06 13:02:28 -04:00
#endif
2022-09-08 01:13:56 -04:00
return TREE_PRESENT;
2022-07-06 13:02:28 -04:00
}
}
if(hole.node == add.node) goto insert; else goto grow;
insert: /* Leaf has space to spare; usually end up here. */
2022-09-08 01:13:56 -04:00