2 * tree234.c: reasonably generic 2-3-4 tree routines. Currently
3 * supports insert, delete, find and iterate operations.
13 #define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
14 /* #define sfree free */
17 #define LOG(x) (printf x)
34 * Create a 2-3-4 tree.
36 tree234 *newtree234(cmpfn234 cmp) {
37 tree234 *ret = mknew(tree234);
38 LOG(("created tree %p\n", ret));
45 * Free a 2-3-4 tree (not including freeing the elements).
47 static void freenode234(node234 *n) {
50 freenode234(n->kids[0]);
51 freenode234(n->kids[1]);
52 freenode234(n->kids[2]);
53 freenode234(n->kids[3]);
56 void freetree234(tree234 *t) {
62 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
63 * an existing element compares equal, returns that.
65 void *add234(tree234 *t, void *e) {
66 node234 *n, **np, *left, *right;
70 LOG(("adding node %p to tree %p\n", e, t));
71 if (t->root == NULL) {
72 t->root = mknew(node234);
73 t->root->elems[1] = t->root->elems[2] = NULL;
74 t->root->kids[0] = t->root->kids[1] = NULL;
75 t->root->kids[2] = t->root->kids[3] = NULL;
76 t->root->parent = NULL;
77 t->root->elems[0] = e;
78 LOG((" created root %p\n", t->root));
85 LOG((" node %p: %p [%p] %p [%p] %p [%p] %p\n",
86 n, n->kids[0], n->elems[0], n->kids[1], n->elems[1],
87 n->kids[2], n->elems[2], n->kids[3]));
88 if ((c = t->cmp(e, n->elems[0])) < 0)
91 return n->elems[0]; /* already exists */
92 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
95 return n->elems[1]; /* already exists */
96 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
99 return n->elems[2]; /* already exists */
102 LOG((" moving to child %d (%p)\n", np - n->kids, *np));
106 * We need to insert the new element in n at position np.
111 LOG((" at %p: %p [%p] %p [%p] %p [%p] %p\n",
112 n, n->kids[0], n->elems[0], n->kids[1], n->elems[1],
113 n->kids[2], n->elems[2], n->kids[3]));
114 LOG((" need to insert %p [%p] %p at position %d\n",
115 left, e, right, np - n->kids));
116 if (n->elems[1] == NULL) {
118 * Insert in a 2-node; simple.
120 if (np == &n->kids[0]) {
121 LOG((" inserting on left of 2-node\n"));
122 n->kids[2] = n->kids[1];
123 n->elems[1] = n->elems[0];
127 } else { /* np == &n->kids[1] */
128 LOG((" inserting on right of 2-node\n"));
133 if (n->kids[0]) n->kids[0]->parent = n;
134 if (n->kids[1]) n->kids[1]->parent = n;
135 if (n->kids[2]) n->kids[2]->parent = n;
138 } else if (n->elems[2] == NULL) {
140 * Insert in a 3-node; simple.
142 if (np == &n->kids[0]) {
143 LOG((" inserting on left of 3-node\n"));
144 n->kids[3] = n->kids[2];
145 n->elems[2] = n->elems[1];
146 n->kids[2] = n->kids[1];
147 n->elems[1] = n->elems[0];
151 } else if (np == &n->kids[1]) {
152 LOG((" inserting in middle of 3-node\n"));
153 n->kids[3] = n->kids[2];
154 n->elems[2] = n->elems[1];
158 } else { /* np == &n->kids[2] */
159 LOG((" inserting on right of 3-node\n"));
164 if (n->kids[0]) n->kids[0]->parent = n;
165 if (n->kids[1]) n->kids[1]->parent = n;
166 if (n->kids[2]) n->kids[2]->parent = n;
167 if (n->kids[3]) n->kids[3]->parent = n;
171 node234 *m = mknew(node234);
172 m->parent = n->parent;
173 LOG((" splitting a 4-node; created new node %p\n", m));
175 * Insert in a 4-node; split into a 2-node and a
176 * 3-node, and move focus up a level.
178 * I don't think it matters which way round we put the
179 * 2 and the 3. For simplicity, we'll put the 3 first
182 if (np == &n->kids[0]) {
186 m->elems[1] = n->elems[0];
187 m->kids[2] = n->kids[1];
189 n->kids[0] = n->kids[2];
190 n->elems[0] = n->elems[2];
191 n->kids[1] = n->kids[3];
192 } else if (np == &n->kids[1]) {
193 m->kids[0] = n->kids[0];
194 m->elems[0] = n->elems[0];
199 n->kids[0] = n->kids[2];
200 n->elems[0] = n->elems[2];
201 n->kids[1] = n->kids[3];
202 } else if (np == &n->kids[2]) {
203 m->kids[0] = n->kids[0];
204 m->elems[0] = n->elems[0];
205 m->kids[1] = n->kids[1];
206 m->elems[1] = n->elems[1];
210 n->elems[0] = n->elems[2];
211 n->kids[1] = n->kids[3];
212 } else { /* np == &n->kids[3] */
213 m->kids[0] = n->kids[0];
214 m->elems[0] = n->elems[0];
215 m->kids[1] = n->kids[1];
216 m->elems[1] = n->elems[1];
217 m->kids[2] = n->kids[2];
223 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
224 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
225 if (m->kids[0]) m->kids[0]->parent = m;
226 if (m->kids[1]) m->kids[1]->parent = m;
227 if (m->kids[2]) m->kids[2]->parent = m;
228 if (n->kids[0]) n->kids[0]->parent = n;
229 if (n->kids[1]) n->kids[1]->parent = n;
230 LOG((" left (%p): %p [%p] %p [%p] %p\n", m,
231 m->kids[0], m->elems[0],
232 m->kids[1], m->elems[1],
234 LOG((" right (%p): %p [%p] %p\n", n,
235 n->kids[0], n->elems[0],
241 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
242 n->parent->kids[1] == n ? &n->parent->kids[1] :
243 n->parent->kids[2] == n ? &n->parent->kids[2] :
244 &n->parent->kids[3]);
249 * If we've come out of here by `break', n will still be
250 * non-NULL and we've finished. If we've come here because n is
251 * NULL, we need to create a new root for the tree because the
252 * old one has just split into two.
255 LOG((" root is overloaded, split into two\n"));
256 t->root = mknew(node234);
257 t->root->kids[0] = left;
258 t->root->elems[0] = e;
259 t->root->kids[1] = right;
260 t->root->elems[1] = NULL;
261 t->root->kids[2] = NULL;
262 t->root->elems[2] = NULL;
263 t->root->kids[3] = NULL;
264 t->root->parent = NULL;
265 if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
266 if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
267 LOG((" new root is %p [%p] %p\n",
268 t->root->kids[0], t->root->elems[0], t->root->kids[1]));
275 * Find an element e in a 2-3-4 tree t. Returns NULL if not found.
276 * e is always passed as the first argument to cmp, so cmp can be
277 * an asymmetric function if desired. cmp can also be passed as
278 * NULL, in which case the compare function from the tree proper
281 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
293 if ( (c = cmp(e, n->elems[0])) < 0)
297 else if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0)
301 else if (n->elems[2] == NULL || (c = cmp(e, n->elems[2])) < 0)
310 * We've found our way to the bottom of the tree and we know
311 * where we would insert this node if we wanted to. But it
318 * Delete an element e in a 2-3-4 tree. Does not free the element,
319 * merely removes all links to it from the tree nodes.
321 void del234(tree234 *t, void *e) {
326 LOG(("deleting %p from tree %p\n", e, t));
333 LOG((" node %p: %p [%p] %p [%p] %p [%p] %p\n",
334 n, n->kids[0], n->elems[0], n->kids[1], n->elems[1],
335 n->kids[2], n->elems[2], n->kids[3]));
336 if ((c = t->cmp(e, n->elems[0])) < 0) {
340 } else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0) {
344 } else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0) {
352 * Recurse down to subtree ki. If it has only one element,
353 * we have to do some transformation to start with.
355 LOG((" moving to subtree %d\n", ki));
357 if (!sub->elems[1]) {
358 LOG((" subtree has only one element!\n", ki));
359 if (ki > 0 && n->kids[ki-1]->elems[1]) {
361 * Case 3a, left-handed variant. Child ki has
362 * only one element, but child ki-1 has two or
363 * more. So we need to move a subtree from ki-1
368 * [more] a A b B c d D e [more] a A b c C d D e
370 node234 *sib = n->kids[ki-1];
371 int lastelem = (sib->elems[2] ? 2 :
372 sib->elems[1] ? 1 : 0);
373 sub->kids[2] = sub->kids[1];
374 sub->elems[1] = sub->elems[0];
375 sub->kids[1] = sub->kids[0];
376 sub->elems[0] = n->elems[ki-1];
377 sub->kids[0] = sib->kids[lastelem+1];
378 if (sub->kids[0]) sub->kids[0]->parent = sub;
379 n->elems[ki-1] = sib->elems[lastelem];
380 sib->kids[lastelem+1] = NULL;
381 sib->elems[lastelem] = NULL;
382 LOG((" case 3a left\n"));
383 } else if (ki < 3 && n->kids[ki+1] &&
384 n->kids[ki+1]->elems[1]) {
386 * Case 3a, right-handed variant. ki has only
387 * one element but ki+1 has two or more. Move a
388 * subtree from ki+1 to ki.
392 * a A b c C d D e [more] a A b B c d D e [more]
394 node234 *sib = n->kids[ki+1];
396 sub->elems[1] = n->elems[ki];
397 sub->kids[2] = sib->kids[0];
398 if (sub->kids[2]) sub->kids[2]->parent = sub;
399 n->elems[ki] = sib->elems[0];
400 sib->kids[0] = sib->kids[1];
401 for (j = 0; j < 2 && sib->elems[j+1]; j++) {
402 sib->kids[j+1] = sib->kids[j+2];
403 sib->elems[j] = sib->elems[j+1];
405 sib->kids[j+1] = NULL;
406 sib->elems[j] = NULL;
407 LOG((" case 3a right\n"));
410 * Case 3b. ki has only one element, and has no
411 * neighbour with more than one. So pick a
412 * neighbour and merge it with ki, taking an
413 * element down from n to go in the middle.
417 * a A b c C d a A b B c C d
419 * (Since at all points we have avoided
420 * descending to a node with only one element,
421 * we can be sure that n is not reduced to
422 * nothingness by this move, _unless_ it was
423 * the very first node, ie the root of the
424 * tree. In that case we remove the now-empty
425 * root and replace it with its single large
436 sub->kids[3] = sub->kids[1];
437 sub->elems[2] = sub->elems[0];
438 sub->kids[2] = sub->kids[0];
439 sub->elems[1] = n->elems[ki];
440 sub->kids[1] = sib->kids[1];
441 if (sub->kids[1]) sub->kids[1]->parent = sub;
442 sub->elems[0] = sib->elems[0];
443 sub->kids[0] = sib->kids[0];
444 if (sub->kids[0]) sub->kids[0]->parent = sub;
449 * That's built the big node in sub. Now we
450 * need to remove the reference to sib in n.
452 for (j = ki; j < 3 && n->kids[j+1]; j++) {
453 n->kids[j] = n->kids[j+1];
454 n->elems[j] = j<2 ? n->elems[j+1] : NULL;
457 if (j < 3) n->elems[j] = NULL;
458 LOG((" case 3b ki=%d\n", ki));
462 * The root is empty and needs to be
465 LOG((" shifting root!\n"));
475 return; /* nothing to do; `already removed' */
478 * Treat special case: this is the one remaining item in
479 * the tree. n is the tree root (no parent), has one
480 * element (no elems[1]), and has no kids (no kids[0]).
482 if (!n->parent && !n->elems[1] && !n->kids[0]) {
483 LOG((" removed last element in tree\n"));
490 * Now we have the element we want, as n->elems[ei], and we
491 * have also arranged for that element not to be the only
492 * one in its node. So...
495 if (!n->kids[0] && n->elems[1]) {
497 * Case 1. n is a leaf node with more than one element,
498 * so it's _really easy_. Just delete the thing and
503 for (i = ei; i < 2 && n->elems[i+1]; i++)
504 n->elems[i] = n->elems[i+1];
506 return; /* finished! */
507 } else if (n->kids[ei]->elems[1]) {
509 * Case 2a. n is an internal node, and the root of the
510 * subtree to the left of e has more than one element.
511 * So find the predecessor p to e (ie the largest node
512 * in that subtree), place it where e currently is, and
513 * then start the deletion process over again on the
514 * subtree with p as target.
516 node234 *m = n->kids[ei];
520 m = (m->kids[3] ? m->kids[3] :
521 m->kids[2] ? m->kids[2] :
522 m->kids[1] ? m->kids[1] : m->kids[0]);
524 target = (m->elems[2] ? m->elems[2] :
525 m->elems[1] ? m->elems[1] : m->elems[0]);
526 n->elems[ei] = target;
529 } else if (n->kids[ei+1]->elems[1]) {
531 * Case 2b, symmetric to 2a but s/left/right/ and
532 * s/predecessor/successor/. (And s/largest/smallest/).
534 node234 *m = n->kids[ei+1];
540 target = m->elems[0];
541 n->elems[ei] = target;
546 * Case 2c. n is an internal node, and the subtrees to
547 * the left and right of e both have only one element.
548 * So combine the two subnodes into a single big node
549 * with their own elements on the left and right and e
550 * in the middle, then restart the deletion process on
551 * that subtree, with e still as target.
553 node234 *a = n->kids[ei], *b = n->kids[ei+1];
557 a->elems[1] = n->elems[ei];
558 a->kids[2] = b->kids[0];
559 if (a->kids[2]) a->kids[2]->parent = a;
560 a->elems[2] = b->elems[0];
561 a->kids[3] = b->kids[1];
562 if (a->kids[3]) a->kids[3]->parent = a;
565 * That's built the big node in a, and destroyed b. Now
566 * remove the reference to b (and e) in n.
568 for (j = ei; j < 2 && n->elems[j+1]; j++) {
569 n->elems[j] = n->elems[j+1];
570 n->kids[j+1] = n->kids[j+2];
575 * It's possible, in this case, that we've just removed
576 * the only element in the root of the tree. If so,
579 if (n->elems[0] == NULL) {
580 LOG((" shifting root!\n"));
586 * Now go round the deletion process again, with n
587 * pointing at the new big node and e still the same.
595 * Iterate over the elements of a tree234, in order.
597 void *first234(tree234 *t, enum234 *e) {
598 node234 *n = t->root;
608 void *next234(enum234 *e) {
609 node234 *n = e->node;
612 if (n->kids[pos+1]) {
621 if (pos < 2 && n->elems[pos+1]) {
623 return n->elems[e->posn];
627 node234 *nn = n->parent;
629 return NULL; /* end of tree */
630 pos = (nn->kids[0] == n ? 0 :
631 nn->kids[1] == n ? 1 :
632 nn->kids[2] == n ? 2 : 3);
634 } while (pos == 3 || n->kids[pos+1] == NULL);
638 return n->elems[pos];
644 * Test code for the 2-3-4 tree. This code maintains an alternative
645 * representation of the data in the tree, in an array (using the
646 * obvious and slow insert and delete functions). After each tree
647 * operation, the verify() function is called, which ensures all
648 * the tree properties are preserved (node->child->parent always
649 * equals node; number of kids == 0 or number of elements + 1;
650 * ordering property between elements of a node and elements of its
651 * children is preserved; tree has the same depth everywhere; every
652 * node has at least one element) and also ensures the list
653 * represented by the tree is the same list it should be. (This
654 * last check also verifies the ordering properties, because the
655 * `same list it should be' is by definition correctly ordered. It
656 * also ensures all nodes are distinct, because the enum functions
657 * would get caught in a loop if not.)
663 * Error reporting function.
665 void error(char *fmt, ...) {
669 vfprintf(stdout, fmt, ap);
674 /* The array representation of the data. */
676 int arraylen, arraysize;
679 /* The tree representation of the same data. */
687 void chknode(chkctx *ctx, int level, node234 *node,
688 void *lowbound, void *highbound) {
692 /* Count the non-NULL kids. */
693 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
694 /* Ensure no kids beyond the first NULL are non-NULL. */
695 for (i = nkids; i < 4; i++)
697 error("node %p: nkids=%d but kids[%d] non-NULL",
701 /* Count the non-NULL elements. */
702 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
703 /* Ensure no elements beyond the first NULL are non-NULL. */
704 for (i = nelems; i < 3; i++)
705 if (node->elems[i]) {
706 error("node %p: nelems=%d but elems[%d] non-NULL",
712 * If nkids==0, this is a leaf node; verify that the tree
713 * depth is the same everywhere.
715 if (ctx->treedepth < 0)
716 ctx->treedepth = level; /* we didn't know the depth yet */
717 else if (ctx->treedepth != level)
718 error("node %p: leaf at depth %d, previously seen depth %d",
719 node, level, ctx->treedepth);
722 * If nkids != 0, then it should be nelems+1, unless nelems
723 * is 0 in which case nkids should also be 0 (and so we
724 * shouldn't be in this condition at all).
726 int shouldkids = (nelems ? nelems+1 : 0);
727 if (nkids != shouldkids) {
728 error("node %p: %d elems should mean %d kids but has %d",
729 node, nelems, shouldkids, nkids);
734 * nelems should be at least 1.
737 error("node %p: no elems", node, nkids);
741 * Add nelems to the running element count of the whole tree
742 * (to ensure the enum234 routines see them all).
744 ctx->elemcount += nelems;
747 * Check ordering property: all elements should be strictly >
748 * lowbound, strictly < highbound, and strictly < each other in
749 * sequence. (lowbound and highbound are NULL at edges of tree
750 * - both NULL at root node - and NULL is considered to be <
751 * everything and > everything. IYSWIM.)
753 for (i = -1; i < nelems; i++) {
754 void *lower = (i == -1 ? lowbound : node->elems[i]);
755 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
756 if (lower && higher && cmp(lower, higher) >= 0) {
757 error("node %p: kid comparison [%d=%s,%d=%s] failed",
758 node, i, lower, i+1, higher);
763 * Check parent pointers: all non-NULL kids should have a
764 * parent pointer coming back to this node.
766 for (i = 0; i < nkids; i++)
767 if (node->kids[i]->parent != node) {
768 error("node %p kid %d: parent ptr is %p not %p",
769 node, i, node->kids[i]->parent, node);
774 * Now (finally!) recurse into subtrees.
776 for (i = 0; i < nkids; i++) {
777 void *lower = (i == 0 ? lowbound : node->elems[i-1]);
778 void *higher = (i >= nelems ? highbound : node->elems[i]);
779 chknode(ctx, level+1, node->kids[i], lower, higher);
789 ctx.treedepth = -1; /* depth unknown yet */
790 ctx.elemcount = 0; /* no elements seen yet */
792 * Verify validity of tree properties.
795 chknode(&ctx, 0, tree->root, NULL, NULL);
796 printf("tree depth: %d\n", ctx.treedepth);
798 * Enumerate the tree and ensure it matches up to the array.
800 for (i = 0, p = first234(tree, &e);
802 i++, p = next234(&e)) {
804 error("tree contains more than %d elements", arraylen);
806 error("enum at position %d: array says %s, tree says %s",
809 if (i != ctx.elemcount) {
810 error("tree really contains %d elements, enum gave %d",
814 error("enum gave only %d elements, array has %d", i, arraylen);
818 void addtest(void *elem) {
820 void *retval, *realret;
822 if (arraysize < arraylen+1) {
823 arraysize = arraylen+1+256;
824 array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
825 srealloc(array, arraysize*sizeof(*array)));
829 while (i < arraylen && cmp(elem, array[i]) > 0)
831 /* now i points to the first element >= elem */
832 if (i < arraylen && !cmp(elem, array[i]))
833 retval = array[i]; /* expect that returned not elem */
835 retval = elem; /* expect elem returned (success) */
836 for (j = arraylen; j > i; j--)
837 array[j] = array[j-1];
838 array[i] = elem; /* add elem to array */
842 realret = add234(tree, elem);
843 if (realret != retval) {
844 error("add: retval was %p expected %p", realret, retval);
850 void deltest(void *elem) {
854 while (i < arraylen && cmp(elem, array[i]) > 0)
856 /* now i points to the first element >= elem */
857 if (i >= arraylen || cmp(elem, array[i]) != 0)
858 return; /* don't do it! */
860 while (i < arraylen-1) {
861 array[i] = array[i+1];
864 arraylen--; /* delete elem from array */
872 /* A sample data set and test utility. Designed for pseudo-randomness,
873 * and yet repeatability. */
876 * This random number generator uses the `portable implementation'
877 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
880 int randomnumber(unsigned *seed) {
883 return ((*seed) / 65536) % 32768;
886 int mycmp(void *av, void *bv) {
887 char const *a = (char const *)av;
888 char const *b = (char const *)bv;
892 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
895 "a", "ab", "absque", "coram", "de",
896 "palam", "clam", "cum", "ex", "e",
897 "sine", "tenus", "pro", "prae",
898 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
899 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
900 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
901 "murfl", "spoo", "breen", "flarn", "octothorpe",
902 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
903 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
904 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
905 "wand", "ring", "amulet"
908 #define NSTR lenof(strings)
915 for (i = 0; i < NSTR; i++) in[i] = 0;
917 arraylen = arraysize = 0;
918 tree = newtree234(mycmp);
922 for (i = 0; i < 10000; i++) {
923 j = randomnumber(&seed);
925 printf("trial: %d\n", i);
927 printf("deleting %s (%d)\n", strings[j], j);
931 printf("adding %s (%d)\n", strings[j], j);
937 while (arraylen > 0) {
938 j = randomnumber(&seed);