2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
4 * This file is copyright 1999-2001 Simon Tatham.
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #define LOG(x) (printf x)
41 typedef struct node234_Tag node234;
56 * Create a 2-3-4 tree.
58 tree234 *newtree234(cmpfn234 cmp)
60 tree234 *ret = snew(tree234);
61 LOG(("created tree %p\n", ret));
68 * Free a 2-3-4 tree (not including freeing the elements).
70 static void freenode234(node234 * n)
74 freenode234(n->kids[0]);
75 freenode234(n->kids[1]);
76 freenode234(n->kids[2]);
77 freenode234(n->kids[3]);
81 void freetree234(tree234 * t)
88 * Internal function to count a node.
90 static int countnode234(node234 * n)
96 for (i = 0; i < 4; i++)
97 count += n->counts[i];
98 for (i = 0; i < 3; i++)
105 * Count the elements in a tree.
107 int count234(tree234 * t)
110 return countnode234(t->root);
116 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
117 * an existing element compares equal, returns that.
119 static void *add234_internal(tree234 * t, void *e, int index)
121 node234 *n, **np, *left, *right;
123 int c, lcount, rcount;
125 LOG(("adding node %p to tree %p\n", e, t));
126 if (t->root == NULL) {
127 t->root = snew(node234);
128 t->root->elems[1] = t->root->elems[2] = NULL;
129 t->root->kids[0] = t->root->kids[1] = NULL;
130 t->root->kids[2] = t->root->kids[3] = NULL;
131 t->root->counts[0] = t->root->counts[1] = 0;
132 t->root->counts[2] = t->root->counts[3] = 0;
133 t->root->parent = NULL;
134 t->root->elems[0] = e;
135 LOG((" created root %p\n", t->root));
143 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
145 n->kids[0], n->counts[0], n->elems[0],
146 n->kids[1], n->counts[1], n->elems[1],
147 n->kids[2], n->counts[2], n->elems[2],
148 n->kids[3], n->counts[3]));
152 * Leaf node. We want to insert at kid position
153 * equal to the index:
160 * Internal node. We always descend through it (add
161 * always starts at the bottom, never in the
164 do { /* this is a do ... while (0) to allow `break' */
165 if (index <= n->counts[0]) {
169 index -= n->counts[0] + 1;
170 if (index <= n->counts[1]) {
174 index -= n->counts[1] + 1;
175 if (index <= n->counts[2]) {
179 index -= n->counts[2] + 1;
180 if (index <= n->counts[3]) {
184 return NULL; /* error: index out of range */
188 if ((c = t->cmp(e, n->elems[0])) < 0)
191 return n->elems[0]; /* already exists */
192 else if (n->elems[1] == NULL
193 || (c = t->cmp(e, n->elems[1])) < 0) childnum = 1;
195 return n->elems[1]; /* already exists */
196 else if (n->elems[2] == NULL
197 || (c = t->cmp(e, n->elems[2])) < 0) childnum = 2;
199 return n->elems[2]; /* already exists */
203 np = &n->kids[childnum];
204 LOG((" moving to child %d (%p)\n", childnum, *np));
208 * We need to insert the new element in n at position np.
215 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
217 n->kids[0], n->counts[0], n->elems[0],
218 n->kids[1], n->counts[1], n->elems[1],
219 n->kids[2], n->counts[2], n->elems[2],
220 n->kids[3], n->counts[3]));
221 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
222 left, lcount, e, right, rcount, np - n->kids));
223 if (n->elems[1] == NULL) {
225 * Insert in a 2-node; simple.
227 if (np == &n->kids[0]) {
228 LOG((" inserting on left of 2-node\n"));
229 n->kids[2] = n->kids[1];
230 n->counts[2] = n->counts[1];
231 n->elems[1] = n->elems[0];
233 n->counts[1] = rcount;
236 n->counts[0] = lcount;
237 } else { /* np == &n->kids[1] */
238 LOG((" inserting on right of 2-node\n"));
240 n->counts[2] = rcount;
243 n->counts[1] = lcount;
246 n->kids[0]->parent = n;
248 n->kids[1]->parent = n;
250 n->kids[2]->parent = n;
253 } else if (n->elems[2] == NULL) {
255 * Insert in a 3-node; simple.
257 if (np == &n->kids[0]) {
258 LOG((" inserting on left of 3-node\n"));
259 n->kids[3] = n->kids[2];
260 n->counts[3] = n->counts[2];
261 n->elems[2] = n->elems[1];
262 n->kids[2] = n->kids[1];
263 n->counts[2] = n->counts[1];
264 n->elems[1] = n->elems[0];
266 n->counts[1] = rcount;
269 n->counts[0] = lcount;
270 } else if (np == &n->kids[1]) {
271 LOG((" inserting in middle of 3-node\n"));
272 n->kids[3] = n->kids[2];
273 n->counts[3] = n->counts[2];
274 n->elems[2] = n->elems[1];
276 n->counts[2] = rcount;
279 n->counts[1] = lcount;
280 } else { /* np == &n->kids[2] */
281 LOG((" inserting on right of 3-node\n"));
283 n->counts[3] = rcount;
286 n->counts[2] = lcount;
289 n->kids[0]->parent = n;
291 n->kids[1]->parent = n;
293 n->kids[2]->parent = n;
295 n->kids[3]->parent = n;
299 node234 *m = snew(node234);
300 m->parent = n->parent;
301 LOG((" splitting a 4-node; created new node %p\n", m));
303 * Insert in a 4-node; split into a 2-node and a
304 * 3-node, and move focus up a level.
306 * I don't think it matters which way round we put the
307 * 2 and the 3. For simplicity, we'll put the 3 first
310 if (np == &n->kids[0]) {
312 m->counts[0] = lcount;
315 m->counts[1] = rcount;
316 m->elems[1] = n->elems[0];
317 m->kids[2] = n->kids[1];
318 m->counts[2] = n->counts[1];
320 n->kids[0] = n->kids[2];
321 n->counts[0] = n->counts[2];
322 n->elems[0] = n->elems[2];
323 n->kids[1] = n->kids[3];
324 n->counts[1] = n->counts[3];
325 } else if (np == &n->kids[1]) {
326 m->kids[0] = n->kids[0];
327 m->counts[0] = n->counts[0];
328 m->elems[0] = n->elems[0];
330 m->counts[1] = lcount;
333 m->counts[2] = rcount;
335 n->kids[0] = n->kids[2];
336 n->counts[0] = n->counts[2];
337 n->elems[0] = n->elems[2];
338 n->kids[1] = n->kids[3];
339 n->counts[1] = n->counts[3];
340 } else if (np == &n->kids[2]) {
341 m->kids[0] = n->kids[0];
342 m->counts[0] = n->counts[0];
343 m->elems[0] = n->elems[0];
344 m->kids[1] = n->kids[1];
345 m->counts[1] = n->counts[1];
346 m->elems[1] = n->elems[1];
348 m->counts[2] = lcount;
351 n->counts[0] = rcount;
352 n->elems[0] = n->elems[2];
353 n->kids[1] = n->kids[3];
354 n->counts[1] = n->counts[3];
355 } else { /* np == &n->kids[3] */
356 m->kids[0] = n->kids[0];
357 m->counts[0] = n->counts[0];
358 m->elems[0] = n->elems[0];
359 m->kids[1] = n->kids[1];
360 m->counts[1] = n->counts[1];
361 m->elems[1] = n->elems[1];
362 m->kids[2] = n->kids[2];
363 m->counts[2] = n->counts[2];
365 n->counts[0] = lcount;
368 n->counts[1] = rcount;
371 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
372 m->counts[3] = n->counts[3] = n->counts[2] = 0;
373 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
375 m->kids[0]->parent = m;
377 m->kids[1]->parent = m;
379 m->kids[2]->parent = m;
381 n->kids[0]->parent = n;
383 n->kids[1]->parent = n;
384 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
385 m->kids[0], m->counts[0], m->elems[0],
386 m->kids[1], m->counts[1], m->elems[1],
387 m->kids[2], m->counts[2]));
388 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
389 n->kids[0], n->counts[0], n->elems[0],
390 n->kids[1], n->counts[1]));
392 lcount = countnode234(left);
394 rcount = countnode234(right);
397 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
398 n->parent->kids[1] == n ? &n->parent->kids[1] :
399 n->parent->kids[2] == n ? &n->parent->kids[2] :
400 &n->parent->kids[3]);
405 * If we've come out of here by `break', n will still be
406 * non-NULL and all we need to do is go back up the tree
407 * updating counts. If we've come here because n is NULL, we
408 * need to create a new root for the tree because the old one
409 * has just split into two. */
412 int count = countnode234(n);
414 childnum = (n->parent->kids[0] == n ? 0 :
415 n->parent->kids[1] == n ? 1 :
416 n->parent->kids[2] == n ? 2 : 3);
417 n->parent->counts[childnum] = count;
421 LOG((" root is overloaded, split into two\n"));
422 t->root = snew(node234);
423 t->root->kids[0] = left;
424 t->root->counts[0] = lcount;
425 t->root->elems[0] = e;
426 t->root->kids[1] = right;
427 t->root->counts[1] = rcount;
428 t->root->elems[1] = NULL;
429 t->root->kids[2] = NULL;
430 t->root->counts[2] = 0;
431 t->root->elems[2] = NULL;
432 t->root->kids[3] = NULL;
433 t->root->counts[3] = 0;
434 t->root->parent = NULL;
435 if (t->root->kids[0])
436 t->root->kids[0]->parent = t->root;
437 if (t->root->kids[1])
438 t->root->kids[1]->parent = t->root;
439 LOG((" new root is %p/%d [%p] %p/%d\n",
440 t->root->kids[0], t->root->counts[0],
441 t->root->elems[0], t->root->kids[1], t->root->counts[1]));
447 void *add234(tree234 * t, void *e)
449 if (!t->cmp) /* tree is unsorted */
452 return add234_internal(t, e, -1);
454 void *addpos234(tree234 * t, void *e, int index)
456 if (index < 0 || /* index out of range */
457 t->cmp) /* tree is sorted */
458 return NULL; /* return failure */
460 return add234_internal(t, e, index); /* this checks the upper bound */
464 * Look up the element at a given numeric index in a 2-3-4 tree.
465 * Returns NULL if the index is out of range.
467 void *index234(tree234 * t, int index)
472 return NULL; /* tree is empty */
474 if (index < 0 || index >= countnode234(t->root))
475 return NULL; /* out of range */
480 if (index < n->counts[0])
482 else if (index -= n->counts[0] + 1, index < 0)
484 else if (index < n->counts[1])
486 else if (index -= n->counts[1] + 1, index < 0)
488 else if (index < n->counts[2])
490 else if (index -= n->counts[2] + 1, index < 0)
496 /* We shouldn't ever get here. I wonder how we did. */
501 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
502 * found. e is always passed as the first argument to cmp, so cmp
503 * can be an asymmetric function if desired. cmp can also be passed
504 * as NULL, in which case the compare function from the tree proper
507 void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp,
508 int relation, int *index)
513 int idx, ecount, kcount, cmpret;
523 * Attempt to find the element itself.
528 * Prepare a fake `cmp' result if e is NULL.
532 assert(relation == REL234_LT || relation == REL234_GT);
533 if (relation == REL234_LT)
534 cmpret = +1; /* e is a max: always greater */
535 else if (relation == REL234_GT)
536 cmpret = -1; /* e is a min: always smaller */
539 for (kcount = 0; kcount < 4; kcount++) {
540 if (kcount >= 3 || n->elems[kcount] == NULL ||
541 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
545 idx += n->counts[kcount];
562 * We have found the element we're looking for. It's
563 * n->elems[ecount], at tree index idx. If our search
564 * relation is EQ, LE or GE we can now go home.
566 if (relation != REL234_LT && relation != REL234_GT) {
569 return n->elems[ecount];
573 * Otherwise, we'll do an indexed lookup for the previous
574 * or next element. (It would be perfectly possible to
575 * implement these search types in a non-counted tree by
576 * going back up from where we are, but far more fiddly.)
578 if (relation == REL234_LT)
584 * We've found our way to the bottom of the tree and we
585 * know where we would insert this node if we wanted to:
586 * we'd put it in in place of the (empty) subtree
587 * n->kids[kcount], and it would have index idx
589 * But the actual element isn't there. So if our search
590 * relation is EQ, we're doomed.
592 if (relation == REL234_EQ)
596 * Otherwise, we must do an index lookup for index idx-1
597 * (if we're going left - LE or LT) or index idx (if we're
598 * going right - GE or GT).
600 if (relation == REL234_LT || relation == REL234_LE) {
606 * We know the index of the element we want; just call index234
607 * to do the rest. This will return NULL if the index is out of
608 * bounds, which is exactly what we want.
610 ret = index234(t, idx);
615 void *find234(tree234 * t, void *e, cmpfn234 cmp)
617 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
619 void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation)
621 return findrelpos234(t, e, cmp, relation, NULL);
623 void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index)
625 return findrelpos234(t, e, cmp, REL234_EQ, index);
629 * Delete an element e in a 2-3-4 tree. Does not free the element,
630 * merely removes all links to it from the tree nodes.
632 static void *delpos234_internal(tree234 * t, int index)
641 LOG(("deleting item %d from tree %p\n", index, t));
648 (" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
649 n, n->kids[0], n->counts[0], n->elems[0], n->kids[1],
650 n->counts[1], n->elems[1], n->kids[2], n->counts[2],
651 n->elems[2], n->kids[3], n->counts[3], index));
652 if (index < n->counts[0]) {
654 } else if (index -= n->counts[0] + 1, index < 0) {
657 } else if (index < n->counts[1]) {
659 } else if (index -= n->counts[1] + 1, index < 0) {
662 } else if (index < n->counts[2]) {
664 } else if (index -= n->counts[2] + 1, index < 0) {
671 * Recurse down to subtree ki. If it has only one element,
672 * we have to do some transformation to start with.
674 LOG((" moving to subtree %d\n", ki));
676 if (!sub->elems[1]) {
677 LOG((" subtree has only one element!\n", ki));
678 if (ki > 0 && n->kids[ki - 1]->elems[1]) {
680 * Case 3a, left-handed variant. Child ki has
681 * only one element, but child ki-1 has two or
682 * more. So we need to move a subtree from ki-1
687 * [more] a A b B c d D e [more] a A b c C d D e
689 node234 *sib = n->kids[ki - 1];
690 int lastelem = (sib->elems[2] ? 2 :
691 sib->elems[1] ? 1 : 0);
692 sub->kids[2] = sub->kids[1];
693 sub->counts[2] = sub->counts[1];
694 sub->elems[1] = sub->elems[0];
695 sub->kids[1] = sub->kids[0];
696 sub->counts[1] = sub->counts[0];
697 sub->elems[0] = n->elems[ki - 1];
698 sub->kids[0] = sib->kids[lastelem + 1];
699 sub->counts[0] = sib->counts[lastelem + 1];
701 sub->kids[0]->parent = sub;
702 n->elems[ki - 1] = sib->elems[lastelem];
703 sib->kids[lastelem + 1] = NULL;
704 sib->counts[lastelem + 1] = 0;
705 sib->elems[lastelem] = NULL;
706 n->counts[ki] = countnode234(sub);
707 LOG((" case 3a left\n"));
709 (" index and left subtree count before adjustment: %d, %d\n",
710 index, n->counts[ki - 1]));
711 index += n->counts[ki - 1];
712 n->counts[ki - 1] = countnode234(sib);
713 index -= n->counts[ki - 1];
715 (" index and left subtree count after adjustment: %d, %d\n",
716 index, n->counts[ki - 1]));
717 } else if (ki < 3 && n->kids[ki + 1]
718 && n->kids[ki + 1]->elems[1]) {
720 * Case 3a, right-handed variant. ki has only
721 * one element but ki+1 has two or more. Move a
722 * subtree from ki+1 to ki.
726 * a A b c C d D e [more] a A b B c d D e [more]
728 node234 *sib = n->kids[ki + 1];
730 sub->elems[1] = n->elems[ki];
731 sub->kids[2] = sib->kids[0];
732 sub->counts[2] = sib->counts[0];
734 sub->kids[2]->parent = sub;
735 n->elems[ki] = sib->elems[0];
736 sib->kids[0] = sib->kids[1];
737 sib->counts[0] = sib->counts[1];
738 for (j = 0; j < 2 && sib->elems[j + 1]; j++) {
739 sib->kids[j + 1] = sib->kids[j + 2];
740 sib->counts[j + 1] = sib->counts[j + 2];
741 sib->elems[j] = sib->elems[j + 1];
743 sib->kids[j + 1] = NULL;
744 sib->counts[j + 1] = 0;
745 sib->elems[j] = NULL;
746 n->counts[ki] = countnode234(sub);
747 n->counts[ki + 1] = countnode234(sib);
748 LOG((" case 3a right\n"));
751 * Case 3b. ki has only one element, and has no
752 * neighbour with more than one. So pick a
753 * neighbour and merge it with ki, taking an
754 * element down from n to go in the middle.
758 * a A b c C d a A b B c C d
760 * (Since at all points we have avoided
761 * descending to a node with only one element,
762 * we can be sure that n is not reduced to
763 * nothingness by this move, _unless_ it was
764 * the very first node, ie the root of the
765 * tree. In that case we remove the now-empty
766 * root and replace it with its single large
774 index += n->counts[ki] + 1;
777 sub = n->kids[ki + 1];
779 sub->kids[3] = sub->kids[1];
780 sub->counts[3] = sub->counts[1];
781 sub->elems[2] = sub->elems[0];
782 sub->kids[2] = sub->kids[0];
783 sub->counts[2] = sub->counts[0];
784 sub->elems[1] = n->elems[ki];
785 sub->kids[1] = sib->kids[1];
786 sub->counts[1] = sib->counts[1];
788 sub->kids[1]->parent = sub;
789 sub->elems[0] = sib->elems[0];
790 sub->kids[0] = sib->kids[0];
791 sub->counts[0] = sib->counts[0];
793 sub->kids[0]->parent = sub;
795 n->counts[ki + 1] = countnode234(sub);
800 * That's built the big node in sub. Now we
801 * need to remove the reference to sib in n.
803 for (j = ki; j < 3 && n->kids[j + 1]; j++) {
804 n->kids[j] = n->kids[j + 1];
805 n->counts[j] = n->counts[j + 1];
806 n->elems[j] = j < 2 ? n->elems[j + 1] : NULL;
812 LOG((" case 3b ki=%d\n", ki));
816 * The root is empty and needs to be
819 LOG((" shifting root!\n"));
829 retval = n->elems[ei];
832 return NULL; /* although this shouldn't happen */
835 * Treat special case: this is the one remaining item in
836 * the tree. n is the tree root (no parent), has one
837 * element (no elems[1]), and has no kids (no kids[0]).
839 if (!n->parent && !n->elems[1] && !n->kids[0]) {
840 LOG((" removed last element in tree\n"));
847 * Now we have the element we want, as n->elems[ei], and we
848 * have also arranged for that element not to be the only
849 * one in its node. So...
852 if (!n->kids[0] && n->elems[1]) {
854 * Case 1. n is a leaf node with more than one element,
855 * so it's _really easy_. Just delete the thing and
860 for (i = ei; i < 2 && n->elems[i + 1]; i++)
861 n->elems[i] = n->elems[i + 1];
864 * Having done that to the leaf node, we now go back up
865 * the tree fixing the counts.
869 childnum = (n->parent->kids[0] == n ? 0 :
870 n->parent->kids[1] == n ? 1 :
871 n->parent->kids[2] == n ? 2 : 3);
872 n->parent->counts[childnum]--;
875 return retval; /* finished! */
876 } else if (n->kids[ei]->elems[1]) {
878 * Case 2a. n is an internal node, and the root of the
879 * subtree to the left of e has more than one element.
880 * So find the predecessor p to e (ie the largest node
881 * in that subtree), place it where e currently is, and
882 * then start the deletion process over again on the
883 * subtree with p as target.
885 node234 *m = n->kids[ei];
889 m = (m->kids[3] ? m->kids[3] :
890 m->kids[2] ? m->kids[2] :
891 m->kids[1] ? m->kids[1] : m->kids[0]);
893 target = (m->elems[2] ? m->elems[2] :
894 m->elems[1] ? m->elems[1] : m->elems[0]);
895 n->elems[ei] = target;
896 index = n->counts[ei] - 1;
898 } else if (n->kids[ei + 1]->elems[1]) {
900 * Case 2b, symmetric to 2a but s/left/right/ and
901 * s/predecessor/successor/. (And s/largest/smallest/).
903 node234 *m = n->kids[ei + 1];
909 target = m->elems[0];
910 n->elems[ei] = target;
915 * Case 2c. n is an internal node, and the subtrees to
916 * the left and right of e both have only one element.
917 * So combine the two subnodes into a single big node
918 * with their own elements on the left and right and e
919 * in the middle, then restart the deletion process on
920 * that subtree, with e still as target.
922 node234 *a = n->kids[ei], *b = n->kids[ei + 1];
926 a->elems[1] = n->elems[ei];
927 a->kids[2] = b->kids[0];
928 a->counts[2] = b->counts[0];
930 a->kids[2]->parent = a;
931 a->elems[2] = b->elems[0];
932 a->kids[3] = b->kids[1];
933 a->counts[3] = b->counts[1];
935 a->kids[3]->parent = a;
937 n->counts[ei] = countnode234(a);
939 * That's built the big node in a, and destroyed b. Now
940 * remove the reference to b (and e) in n.
942 for (j = ei; j < 2 && n->elems[j + 1]; j++) {
943 n->elems[j] = n->elems[j + 1];
944 n->kids[j + 1] = n->kids[j + 2];
945 n->counts[j + 1] = n->counts[j + 2];
948 n->kids[j + 1] = NULL;
949 n->counts[j + 1] = 0;
951 * It's possible, in this case, that we've just removed
952 * the only element in the root of the tree. If so,
955 if (n->elems[0] == NULL) {
956 LOG((" shifting root!\n"));
962 * Now go round the deletion process again, with n
963 * pointing at the new big node and e still the same.
966 index = a->counts[0] + a->counts[1] + 1;
970 void *delpos234(tree234 * t, int index)
972 if (index < 0 || index >= countnode234(t->root))
974 return delpos234_internal(t, index);
976 void *del234(tree234 * t, void *e)
979 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
980 return NULL; /* it wasn't in there anyway */
981 return delpos234_internal(t, index); /* it's there; delete it. */
987 * Test code for the 2-3-4 tree. This code maintains an alternative
988 * representation of the data in the tree, in an array (using the
989 * obvious and slow insert and delete functions). After each tree
990 * operation, the verify() function is called, which ensures all
991 * the tree properties are preserved:
992 * - node->child->parent always equals node
993 * - tree->root->parent always equals NULL
994 * - number of kids == 0 or number of elements + 1;
995 * - tree has the same depth everywhere
996 * - every node has at least one element
997 * - subtree element counts are accurate
998 * - any NULL kid pointer is accompanied by a zero count
999 * - in a sorted tree: ordering property between elements of a
1000 * node and elements of its children is preserved
1001 * and also ensures the list represented by the tree is the same
1002 * list it should be. (This last check also doubly verifies the
1003 * ordering properties, because the `same list it should be' is by
1004 * definition correctly ordered. It also ensures all nodes are
1005 * distinct, because the enum functions would get caught in a loop
1012 * Error reporting function.
1014 void error(char *fmt, ...)
1019 vfprintf(stdout, fmt, ap);
1024 /* The array representation of the data. */
1026 int arraylen, arraysize;
1029 /* The tree representation of the same data. */
1037 int chknode(chkctx * ctx, int level, node234 * node,
1038 void *lowbound, void *highbound)
1044 /* Count the non-NULL kids. */
1045 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
1046 /* Ensure no kids beyond the first NULL are non-NULL. */
1047 for (i = nkids; i < 4; i++)
1048 if (node->kids[i]) {
1049 error("node %p: nkids=%d but kids[%d] non-NULL",
1051 } else if (node->counts[i]) {
1052 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
1053 node, i, i, node->counts[i]);
1056 /* Count the non-NULL elements. */
1057 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
1058 /* Ensure no elements beyond the first NULL are non-NULL. */
1059 for (i = nelems; i < 3; i++)
1060 if (node->elems[i]) {
1061 error("node %p: nelems=%d but elems[%d] non-NULL",
1067 * If nkids==0, this is a leaf node; verify that the tree
1068 * depth is the same everywhere.
1070 if (ctx->treedepth < 0)
1071 ctx->treedepth = level; /* we didn't know the depth yet */
1072 else if (ctx->treedepth != level)
1073 error("node %p: leaf at depth %d, previously seen depth %d",
1074 node, level, ctx->treedepth);
1077 * If nkids != 0, then it should be nelems+1, unless nelems
1078 * is 0 in which case nkids should also be 0 (and so we
1079 * shouldn't be in this condition at all).
1081 int shouldkids = (nelems ? nelems + 1 : 0);
1082 if (nkids != shouldkids) {
1083 error("node %p: %d elems should mean %d kids but has %d",
1084 node, nelems, shouldkids, nkids);
1089 * nelems should be at least 1.
1092 error("node %p: no elems", node, nkids);
1096 * Add nelems to the running element count of the whole tree.
1098 ctx->elemcount += nelems;
1101 * Check ordering property: all elements should be strictly >
1102 * lowbound, strictly < highbound, and strictly < each other in
1103 * sequence. (lowbound and highbound are NULL at edges of tree
1104 * - both NULL at root node - and NULL is considered to be <
1105 * everything and > everything. IYSWIM.)
1108 for (i = -1; i < nelems; i++) {
1109 void *lower = (i == -1 ? lowbound : node->elems[i]);
1111 (i + 1 == nelems ? highbound : node->elems[i + 1]);
1112 if (lower && higher && cmp(lower, higher) >= 0) {
1113 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1114 node, i, lower, i + 1, higher);
1120 * Check parent pointers: all non-NULL kids should have a
1121 * parent pointer coming back to this node.
1123 for (i = 0; i < nkids; i++)
1124 if (node->kids[i]->parent != node) {
1125 error("node %p kid %d: parent ptr is %p not %p",
1126 node, i, node->kids[i]->parent, node);
1131 * Now (finally!) recurse into subtrees.
1135 for (i = 0; i < nkids; i++) {
1136 void *lower = (i == 0 ? lowbound : node->elems[i - 1]);
1137 void *higher = (i >= nelems ? highbound : node->elems[i]);
1139 chknode(ctx, level + 1, node->kids[i], lower, higher);
1140 if (node->counts[i] != subcount) {
1141 error("node %p kid %d: count says %d, subtree really has %d",
1142 node, i, node->counts[i], subcount);
1156 ctx.treedepth = -1; /* depth unknown yet */
1157 ctx.elemcount = 0; /* no elements seen yet */
1159 * Verify validity of tree properties.
1162 if (tree->root->parent != NULL)
1163 error("root->parent is %p should be null", tree->root->parent);
1164 chknode(&ctx, 0, tree->root, NULL, NULL);
1166 printf("tree depth: %d\n", ctx.treedepth);
1168 * Enumerate the tree and ensure it matches up to the array.
1170 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1172 error("tree contains more than %d elements", arraylen);
1174 error("enum at position %d: array says %s, tree says %s",
1177 if (ctx.elemcount != i) {
1178 error("tree really contains %d elements, enum gave %d",
1182 error("enum gave only %d elements, array has %d", i, arraylen);
1185 if (ctx.elemcount != i) {
1186 error("tree really contains %d elements, count234 gave %d",
1191 void internal_addtest(void *elem, int index, void *realret)
1196 if (arraysize < arraylen + 1) {
1197 arraysize = arraylen + 1 + 256;
1198 array = sresize(array, arraysize, void *);
1202 /* now i points to the first element >= elem */
1203 retval = elem; /* expect elem returned (success) */
1204 for (j = arraylen; j > i; j--)
1205 array[j] = array[j - 1];
1206 array[i] = elem; /* add elem to array */
1209 if (realret != retval) {
1210 error("add: retval was %p expected %p", realret, retval);
1216 void addtest(void *elem)
1221 realret = add234(tree, elem);
1224 while (i < arraylen && cmp(elem, array[i]) > 0)
1226 if (i < arraylen && !cmp(elem, array[i])) {
1227 void *retval = array[i]; /* expect that returned not elem */
1228 if (realret != retval) {
1229 error("add: retval was %p expected %p", realret, retval);
1232 internal_addtest(elem, i, realret);
1235 void addpostest(void *elem, int i)
1239 realret = addpos234(tree, elem, i);
1241 internal_addtest(elem, i, realret);
1244 void delpostest(int i)
1247 void *elem = array[i], *ret;
1249 /* i points to the right element */
1250 while (i < arraylen - 1) {
1251 array[i] = array[i + 1];
1254 arraylen--; /* delete elem from array */
1257 ret = del234(tree, elem);
1259 ret = delpos234(tree, index);
1262 error("del returned %p, expected %p", ret, elem);
1268 void deltest(void *elem)
1273 while (i < arraylen && cmp(elem, array[i]) > 0)
1275 if (i >= arraylen || cmp(elem, array[i]) != 0)
1276 return; /* don't do it! */
1280 /* A sample data set and test utility. Designed for pseudo-randomness,
1281 * and yet repeatability. */
1284 * This random number generator uses the `portable implementation'
1285 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1288 int randomnumber(unsigned *seed)
1290 *seed *= 1103515245;
1292 return ((*seed) / 65536) % 32768;
1295 int mycmp(void *av, void *bv)
1297 char const *a = (char const *) av;
1298 char const *b = (char const *) bv;
1299 return strcmp(a, b);
1302 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1305 "a", "ab", "absque", "coram", "de",
1306 "palam", "clam", "cum", "ex", "e",
1307 "sine", "tenus", "pro", "prae",
1308 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1309 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1310 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1311 "murfl", "spoo", "breen", "flarn", "octothorpe",
1312 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1313 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1314 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1315 "wand", "ring", "amulet"
1318 #define NSTR lenof(strings)
1322 const static int rels[] = {
1323 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1325 const static char *const relnames[] = {
1326 "EQ", "GE", "LE", "LT", "GT"
1328 int i, j, rel, index;
1329 char *p, *ret, *realret, *realret2;
1332 for (i = 0; i < NSTR; i++) {
1334 for (j = 0; j < sizeof(rels) / sizeof(*rels); j++) {
1340 mid = (lo + hi) / 2;
1341 c = strcmp(p, array[mid]);
1351 if (rel == REL234_LT)
1352 ret = (mid > 0 ? array[--mid] : NULL);
1353 else if (rel == REL234_GT)
1354 ret = (mid < arraylen - 1 ? array[++mid] : NULL);
1358 assert(lo == hi + 1);
1359 if (rel == REL234_LT || rel == REL234_LE) {
1361 ret = (hi >= 0 ? array[hi] : NULL);
1362 } else if (rel == REL234_GT || rel == REL234_GE) {
1364 ret = (lo < arraylen ? array[lo] : NULL);
1369 realret = findrelpos234(tree, p, NULL, rel, &index);
1370 if (realret != ret) {
1371 error("find(\"%s\",%s) gave %s should be %s",
1372 p, relnames[j], realret, ret);
1374 if (realret && index != mid) {
1375 error("find(\"%s\",%s) gave %d should be %d",
1376 p, relnames[j], index, mid);
1378 if (realret && rel == REL234_EQ) {
1379 realret2 = index234(tree, index);
1380 if (realret2 != realret) {
1381 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1382 p, relnames[j], realret, index, index, realret2);
1386 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1392 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1393 if (arraylen && (realret != array[0] || index != 0)) {
1394 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1395 realret, index, array[0]);
1396 } else if (!arraylen && (realret != NULL)) {
1397 error("find(NULL,GT) gave %s(%d) should be NULL", realret, index);
1400 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1402 && (realret != array[arraylen - 1] || index != arraylen - 1)) {
1403 error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index,
1404 array[arraylen - 1]);
1405 } else if (!arraylen && (realret != NULL)) {
1406 error("find(NULL,LT) gave %s(%d) should be NULL", realret, index);
1416 for (i = 0; i < NSTR; i++)
1419 arraylen = arraysize = 0;
1420 tree = newtree234(mycmp);
1424 for (i = 0; i < 10000; i++) {
1425 j = randomnumber(&seed);
1427 printf("trial: %d\n", i);
1429 printf("deleting %s (%d)\n", strings[j], j);
1430 deltest(strings[j]);
1433 printf("adding %s (%d)\n", strings[j], j);
1434 addtest(strings[j]);
1440 while (arraylen > 0) {
1441 j = randomnumber(&seed);
1449 * Now try an unsorted tree. We don't really need to test
1450 * delpos234 because we know del234 is based on it, so it's
1451 * already been tested in the above sorted-tree code; but for
1452 * completeness we'll use it to tear down our unsorted tree
1453 * once we've built it.
1455 tree = newtree234(NULL);
1458 for (i = 0; i < 1000; i++) {
1459 printf("trial: %d\n", i);
1460 j = randomnumber(&seed);
1462 k = randomnumber(&seed);
1463 k %= count234(tree) + 1;
1464 printf("adding string %s at index %d\n", strings[j], k);
1465 addpostest(strings[j], k);
1467 while (count234(tree) > 0) {
1468 printf("cleanup: tree size %d\n", count234(tree));
1469 j = randomnumber(&seed);
1470 j %= count234(tree);
1471 printf("deleting string %s from index %d\n", array[j], j);