2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
4 * This file is copyright 1999-2001 Simon Tatham.
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #define smalloc malloc
37 #define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
40 #define LOG(x) (printf x)
45 typedef struct node234_Tag node234;
60 * Create a 2-3-4 tree.
62 tree234 *newtree234(cmpfn234 cmp)
64 tree234 *ret = mknew(tree234);
65 LOG(("created tree %p\n", ret));
72 * Free a 2-3-4 tree (not including freeing the elements).
74 static void freenode234(node234 * n)
78 freenode234(n->kids[0]);
79 freenode234(n->kids[1]);
80 freenode234(n->kids[2]);
81 freenode234(n->kids[3]);
85 void freetree234(tree234 * t)
92 * Internal function to count a node.
94 static int countnode234(node234 * n)
100 for (i = 0; i < 4; i++)
101 count += n->counts[i];
102 for (i = 0; i < 3; i++)
109 * Count the elements in a tree.
111 int count234(tree234 * t)
114 return countnode234(t->root);
120 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
121 * an existing element compares equal, returns that.
123 static void *add234_internal(tree234 * t, void *e, int index)
125 node234 *n, **np, *left, *right;
127 int c, lcount, rcount;
129 LOG(("adding node %p to tree %p\n", e, t));
130 if (t->root == NULL) {
131 t->root = mknew(node234);
132 t->root->elems[1] = t->root->elems[2] = NULL;
133 t->root->kids[0] = t->root->kids[1] = NULL;
134 t->root->kids[2] = t->root->kids[3] = NULL;
135 t->root->counts[0] = t->root->counts[1] = 0;
136 t->root->counts[2] = t->root->counts[3] = 0;
137 t->root->parent = NULL;
138 t->root->elems[0] = e;
139 LOG((" created root %p\n", t->root));
147 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
149 n->kids[0], n->counts[0], n->elems[0],
150 n->kids[1], n->counts[1], n->elems[1],
151 n->kids[2], n->counts[2], n->elems[2],
152 n->kids[3], n->counts[3]));
156 * Leaf node. We want to insert at kid position
157 * equal to the index:
164 * Internal node. We always descend through it (add
165 * always starts at the bottom, never in the
168 do { /* this is a do ... while (0) to allow `break' */
169 if (index <= n->counts[0]) {
173 index -= n->counts[0] + 1;
174 if (index <= n->counts[1]) {
178 index -= n->counts[1] + 1;
179 if (index <= n->counts[2]) {
183 index -= n->counts[2] + 1;
184 if (index <= n->counts[3]) {
188 return NULL; /* error: index out of range */
192 if ((c = t->cmp(e, n->elems[0])) < 0)
195 return n->elems[0]; /* already exists */
196 else if (n->elems[1] == NULL
197 || (c = t->cmp(e, n->elems[1])) < 0) childnum = 1;
199 return n->elems[1]; /* already exists */
200 else if (n->elems[2] == NULL
201 || (c = t->cmp(e, n->elems[2])) < 0) childnum = 2;
203 return n->elems[2]; /* already exists */
207 np = &n->kids[childnum];
208 LOG((" moving to child %d (%p)\n", childnum, *np));
212 * We need to insert the new element in n at position np.
219 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
221 n->kids[0], n->counts[0], n->elems[0],
222 n->kids[1], n->counts[1], n->elems[1],
223 n->kids[2], n->counts[2], n->elems[2],
224 n->kids[3], n->counts[3]));
225 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
226 left, lcount, e, right, rcount, np - n->kids));
227 if (n->elems[1] == NULL) {
229 * Insert in a 2-node; simple.
231 if (np == &n->kids[0]) {
232 LOG((" inserting on left of 2-node\n"));
233 n->kids[2] = n->kids[1];
234 n->counts[2] = n->counts[1];
235 n->elems[1] = n->elems[0];
237 n->counts[1] = rcount;
240 n->counts[0] = lcount;
241 } else { /* np == &n->kids[1] */
242 LOG((" inserting on right of 2-node\n"));
244 n->counts[2] = rcount;
247 n->counts[1] = lcount;
250 n->kids[0]->parent = n;
252 n->kids[1]->parent = n;
254 n->kids[2]->parent = n;
257 } else if (n->elems[2] == NULL) {
259 * Insert in a 3-node; simple.
261 if (np == &n->kids[0]) {
262 LOG((" inserting on left of 3-node\n"));
263 n->kids[3] = n->kids[2];
264 n->counts[3] = n->counts[2];
265 n->elems[2] = n->elems[1];
266 n->kids[2] = n->kids[1];
267 n->counts[2] = n->counts[1];
268 n->elems[1] = n->elems[0];
270 n->counts[1] = rcount;
273 n->counts[0] = lcount;
274 } else if (np == &n->kids[1]) {
275 LOG((" inserting in middle of 3-node\n"));
276 n->kids[3] = n->kids[2];
277 n->counts[3] = n->counts[2];
278 n->elems[2] = n->elems[1];
280 n->counts[2] = rcount;
283 n->counts[1] = lcount;
284 } else { /* np == &n->kids[2] */
285 LOG((" inserting on right of 3-node\n"));
287 n->counts[3] = rcount;
290 n->counts[2] = lcount;
293 n->kids[0]->parent = n;
295 n->kids[1]->parent = n;
297 n->kids[2]->parent = n;
299 n->kids[3]->parent = n;
303 node234 *m = mknew(node234);
304 m->parent = n->parent;
305 LOG((" splitting a 4-node; created new node %p\n", m));
307 * Insert in a 4-node; split into a 2-node and a
308 * 3-node, and move focus up a level.
310 * I don't think it matters which way round we put the
311 * 2 and the 3. For simplicity, we'll put the 3 first
314 if (np == &n->kids[0]) {
316 m->counts[0] = lcount;
319 m->counts[1] = rcount;
320 m->elems[1] = n->elems[0];
321 m->kids[2] = n->kids[1];
322 m->counts[2] = n->counts[1];
324 n->kids[0] = n->kids[2];
325 n->counts[0] = n->counts[2];
326 n->elems[0] = n->elems[2];
327 n->kids[1] = n->kids[3];
328 n->counts[1] = n->counts[3];
329 } else if (np == &n->kids[1]) {
330 m->kids[0] = n->kids[0];
331 m->counts[0] = n->counts[0];
332 m->elems[0] = n->elems[0];
334 m->counts[1] = lcount;
337 m->counts[2] = rcount;
339 n->kids[0] = n->kids[2];
340 n->counts[0] = n->counts[2];
341 n->elems[0] = n->elems[2];
342 n->kids[1] = n->kids[3];
343 n->counts[1] = n->counts[3];
344 } else if (np == &n->kids[2]) {
345 m->kids[0] = n->kids[0];
346 m->counts[0] = n->counts[0];
347 m->elems[0] = n->elems[0];
348 m->kids[1] = n->kids[1];
349 m->counts[1] = n->counts[1];
350 m->elems[1] = n->elems[1];
352 m->counts[2] = lcount;
355 n->counts[0] = rcount;
356 n->elems[0] = n->elems[2];
357 n->kids[1] = n->kids[3];
358 n->counts[1] = n->counts[3];
359 } else { /* np == &n->kids[3] */
360 m->kids[0] = n->kids[0];
361 m->counts[0] = n->counts[0];
362 m->elems[0] = n->elems[0];
363 m->kids[1] = n->kids[1];
364 m->counts[1] = n->counts[1];
365 m->elems[1] = n->elems[1];
366 m->kids[2] = n->kids[2];
367 m->counts[2] = n->counts[2];
369 n->counts[0] = lcount;
372 n->counts[1] = rcount;
375 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
376 m->counts[3] = n->counts[3] = n->counts[2] = 0;
377 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
379 m->kids[0]->parent = m;
381 m->kids[1]->parent = m;
383 m->kids[2]->parent = m;
385 n->kids[0]->parent = n;
387 n->kids[1]->parent = n;
388 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
389 m->kids[0], m->counts[0], m->elems[0],
390 m->kids[1], m->counts[1], m->elems[1],
391 m->kids[2], m->counts[2]));
392 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
393 n->kids[0], n->counts[0], n->elems[0],
394 n->kids[1], n->counts[1]));
396 lcount = countnode234(left);
398 rcount = countnode234(right);
401 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
402 n->parent->kids[1] == n ? &n->parent->kids[1] :
403 n->parent->kids[2] == n ? &n->parent->kids[2] :
404 &n->parent->kids[3]);
409 * If we've come out of here by `break', n will still be
410 * non-NULL and all we need to do is go back up the tree
411 * updating counts. If we've come here because n is NULL, we
412 * need to create a new root for the tree because the old one
413 * has just split into two. */
416 int count = countnode234(n);
418 childnum = (n->parent->kids[0] == n ? 0 :
419 n->parent->kids[1] == n ? 1 :
420 n->parent->kids[2] == n ? 2 : 3);
421 n->parent->counts[childnum] = count;
425 LOG((" root is overloaded, split into two\n"));
426 t->root = mknew(node234);
427 t->root->kids[0] = left;
428 t->root->counts[0] = lcount;
429 t->root->elems[0] = e;
430 t->root->kids[1] = right;
431 t->root->counts[1] = rcount;
432 t->root->elems[1] = NULL;
433 t->root->kids[2] = NULL;
434 t->root->counts[2] = 0;
435 t->root->elems[2] = NULL;
436 t->root->kids[3] = NULL;
437 t->root->counts[3] = 0;
438 t->root->parent = NULL;
439 if (t->root->kids[0])
440 t->root->kids[0]->parent = t->root;
441 if (t->root->kids[1])
442 t->root->kids[1]->parent = t->root;
443 LOG((" new root is %p/%d [%p] %p/%d\n",
444 t->root->kids[0], t->root->counts[0],
445 t->root->elems[0], t->root->kids[1], t->root->counts[1]));
451 void *add234(tree234 * t, void *e)
453 if (!t->cmp) /* tree is unsorted */
456 return add234_internal(t, e, -1);
458 void *addpos234(tree234 * t, void *e, int index)
460 if (index < 0 || /* index out of range */
461 t->cmp) /* tree is sorted */
462 return NULL; /* return failure */
464 return add234_internal(t, e, index); /* this checks the upper bound */
468 * Look up the element at a given numeric index in a 2-3-4 tree.
469 * Returns NULL if the index is out of range.
471 void *index234(tree234 * t, int index)
476 return NULL; /* tree is empty */
478 if (index < 0 || index >= countnode234(t->root))
479 return NULL; /* out of range */
484 if (index < n->counts[0])
486 else if (index -= n->counts[0] + 1, index < 0)
488 else if (index < n->counts[1])
490 else if (index -= n->counts[1] + 1, index < 0)
492 else if (index < n->counts[2])
494 else if (index -= n->counts[2] + 1, index < 0)
500 /* We shouldn't ever get here. I wonder how we did. */
505 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
506 * found. e is always passed as the first argument to cmp, so cmp
507 * can be an asymmetric function if desired. cmp can also be passed
508 * as NULL, in which case the compare function from the tree proper
511 void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp,
512 int relation, int *index)
517 int idx, ecount, kcount, cmpret;
527 * Attempt to find the element itself.
532 * Prepare a fake `cmp' result if e is NULL.
536 assert(relation == REL234_LT || relation == REL234_GT);
537 if (relation == REL234_LT)
538 cmpret = +1; /* e is a max: always greater */
539 else if (relation == REL234_GT)
540 cmpret = -1; /* e is a min: always smaller */
543 for (kcount = 0; kcount < 4; kcount++) {
544 if (kcount >= 3 || n->elems[kcount] == NULL ||
545 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
549 idx += n->counts[kcount];
566 * We have found the element we're looking for. It's
567 * n->elems[ecount], at tree index idx. If our search
568 * relation is EQ, LE or GE we can now go home.
570 if (relation != REL234_LT && relation != REL234_GT) {
573 return n->elems[ecount];
577 * Otherwise, we'll do an indexed lookup for the previous
578 * or next element. (It would be perfectly possible to
579 * implement these search types in a non-counted tree by
580 * going back up from where we are, but far more fiddly.)
582 if (relation == REL234_LT)
588 * We've found our way to the bottom of the tree and we
589 * know where we would insert this node if we wanted to:
590 * we'd put it in in place of the (empty) subtree
591 * n->kids[kcount], and it would have index idx
593 * But the actual element isn't there. So if our search
594 * relation is EQ, we're doomed.
596 if (relation == REL234_EQ)
600 * Otherwise, we must do an index lookup for index idx-1
601 * (if we're going left - LE or LT) or index idx (if we're
602 * going right - GE or GT).
604 if (relation == REL234_LT || relation == REL234_LE) {
610 * We know the index of the element we want; just call index234
611 * to do the rest. This will return NULL if the index is out of
612 * bounds, which is exactly what we want.
614 ret = index234(t, idx);
619 void *find234(tree234 * t, void *e, cmpfn234 cmp)
621 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
623 void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation)
625 return findrelpos234(t, e, cmp, relation, NULL);
627 void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index)
629 return findrelpos234(t, e, cmp, REL234_EQ, index);
633 * Delete an element e in a 2-3-4 tree. Does not free the element,
634 * merely removes all links to it from the tree nodes.
636 static void *delpos234_internal(tree234 * t, int index)
645 LOG(("deleting item %d from tree %p\n", index, t));
652 (" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
653 n, n->kids[0], n->counts[0], n->elems[0], n->kids[1],
654 n->counts[1], n->elems[1], n->kids[2], n->counts[2],
655 n->elems[2], n->kids[3], n->counts[3], index));
656 if (index < n->counts[0]) {
658 } else if (index -= n->counts[0] + 1, index < 0) {
661 } else if (index < n->counts[1]) {
663 } else if (index -= n->counts[1] + 1, index < 0) {
666 } else if (index < n->counts[2]) {
668 } else if (index -= n->counts[2] + 1, index < 0) {
675 * Recurse down to subtree ki. If it has only one element,
676 * we have to do some transformation to start with.
678 LOG((" moving to subtree %d\n", ki));
680 if (!sub->elems[1]) {
681 LOG((" subtree has only one element!\n", ki));
682 if (ki > 0 && n->kids[ki - 1]->elems[1]) {
684 * Case 3a, left-handed variant. Child ki has
685 * only one element, but child ki-1 has two or
686 * more. So we need to move a subtree from ki-1
691 * [more] a A b B c d D e [more] a A b c C d D e
693 node234 *sib = n->kids[ki - 1];
694 int lastelem = (sib->elems[2] ? 2 :
695 sib->elems[1] ? 1 : 0);
696 sub->kids[2] = sub->kids[1];
697 sub->counts[2] = sub->counts[1];
698 sub->elems[1] = sub->elems[0];
699 sub->kids[1] = sub->kids[0];
700 sub->counts[1] = sub->counts[0];
701 sub->elems[0] = n->elems[ki - 1];
702 sub->kids[0] = sib->kids[lastelem + 1];
703 sub->counts[0] = sib->counts[lastelem + 1];
705 sub->kids[0]->parent = sub;
706 n->elems[ki - 1] = sib->elems[lastelem];
707 sib->kids[lastelem + 1] = NULL;
708 sib->counts[lastelem + 1] = 0;
709 sib->elems[lastelem] = NULL;
710 n->counts[ki] = countnode234(sub);
711 LOG((" case 3a left\n"));
713 (" index and left subtree count before adjustment: %d, %d\n",
714 index, n->counts[ki - 1]));
715 index += n->counts[ki - 1];
716 n->counts[ki - 1] = countnode234(sib);
717 index -= n->counts[ki - 1];
719 (" index and left subtree count after adjustment: %d, %d\n",
720 index, n->counts[ki - 1]));
721 } else if (ki < 3 && n->kids[ki + 1]
722 && n->kids[ki + 1]->elems[1]) {
724 * Case 3a, right-handed variant. ki has only
725 * one element but ki+1 has two or more. Move a
726 * subtree from ki+1 to ki.
730 * a A b c C d D e [more] a A b B c d D e [more]
732 node234 *sib = n->kids[ki + 1];
734 sub->elems[1] = n->elems[ki];
735 sub->kids[2] = sib->kids[0];
736 sub->counts[2] = sib->counts[0];
738 sub->kids[2]->parent = sub;
739 n->elems[ki] = sib->elems[0];
740 sib->kids[0] = sib->kids[1];
741 sib->counts[0] = sib->counts[1];
742 for (j = 0; j < 2 && sib->elems[j + 1]; j++) {
743 sib->kids[j + 1] = sib->kids[j + 2];
744 sib->counts[j + 1] = sib->counts[j + 2];
745 sib->elems[j] = sib->elems[j + 1];
747 sib->kids[j + 1] = NULL;
748 sib->counts[j + 1] = 0;
749 sib->elems[j] = NULL;
750 n->counts[ki] = countnode234(sub);
751 n->counts[ki + 1] = countnode234(sib);
752 LOG((" case 3a right\n"));
755 * Case 3b. ki has only one element, and has no
756 * neighbour with more than one. So pick a
757 * neighbour and merge it with ki, taking an
758 * element down from n to go in the middle.
762 * a A b c C d a A b B c C d
764 * (Since at all points we have avoided
765 * descending to a node with only one element,
766 * we can be sure that n is not reduced to
767 * nothingness by this move, _unless_ it was
768 * the very first node, ie the root of the
769 * tree. In that case we remove the now-empty
770 * root and replace it with its single large
778 index += n->counts[ki] + 1;
781 sub = n->kids[ki + 1];
783 sub->kids[3] = sub->kids[1];
784 sub->counts[3] = sub->counts[1];
785 sub->elems[2] = sub->elems[0];
786 sub->kids[2] = sub->kids[0];
787 sub->counts[2] = sub->counts[0];
788 sub->elems[1] = n->elems[ki];
789 sub->kids[1] = sib->kids[1];
790 sub->counts[1] = sib->counts[1];
792 sub->kids[1]->parent = sub;
793 sub->elems[0] = sib->elems[0];
794 sub->kids[0] = sib->kids[0];
795 sub->counts[0] = sib->counts[0];
797 sub->kids[0]->parent = sub;
799 n->counts[ki + 1] = countnode234(sub);
804 * That's built the big node in sub. Now we
805 * need to remove the reference to sib in n.
807 for (j = ki; j < 3 && n->kids[j + 1]; j++) {
808 n->kids[j] = n->kids[j + 1];
809 n->counts[j] = n->counts[j + 1];
810 n->elems[j] = j < 2 ? n->elems[j + 1] : NULL;
816 LOG((" case 3b ki=%d\n", ki));
820 * The root is empty and needs to be
823 LOG((" shifting root!\n"));
833 retval = n->elems[ei];
836 return NULL; /* although this shouldn't happen */
839 * Treat special case: this is the one remaining item in
840 * the tree. n is the tree root (no parent), has one
841 * element (no elems[1]), and has no kids (no kids[0]).
843 if (!n->parent && !n->elems[1] && !n->kids[0]) {
844 LOG((" removed last element in tree\n"));
851 * Now we have the element we want, as n->elems[ei], and we
852 * have also arranged for that element not to be the only
853 * one in its node. So...
856 if (!n->kids[0] && n->elems[1]) {
858 * Case 1. n is a leaf node with more than one element,
859 * so it's _really easy_. Just delete the thing and
864 for (i = ei; i < 2 && n->elems[i + 1]; i++)
865 n->elems[i] = n->elems[i + 1];
868 * Having done that to the leaf node, we now go back up
869 * the tree fixing the counts.
873 childnum = (n->parent->kids[0] == n ? 0 :
874 n->parent->kids[1] == n ? 1 :
875 n->parent->kids[2] == n ? 2 : 3);
876 n->parent->counts[childnum]--;
879 return retval; /* finished! */
880 } else if (n->kids[ei]->elems[1]) {
882 * Case 2a. n is an internal node, and the root of the
883 * subtree to the left of e has more than one element.
884 * So find the predecessor p to e (ie the largest node
885 * in that subtree), place it where e currently is, and
886 * then start the deletion process over again on the
887 * subtree with p as target.
889 node234 *m = n->kids[ei];
893 m = (m->kids[3] ? m->kids[3] :
894 m->kids[2] ? m->kids[2] :
895 m->kids[1] ? m->kids[1] : m->kids[0]);
897 target = (m->elems[2] ? m->elems[2] :
898 m->elems[1] ? m->elems[1] : m->elems[0]);
899 n->elems[ei] = target;
900 index = n->counts[ei] - 1;
902 } else if (n->kids[ei + 1]->elems[1]) {
904 * Case 2b, symmetric to 2a but s/left/right/ and
905 * s/predecessor/successor/. (And s/largest/smallest/).
907 node234 *m = n->kids[ei + 1];
913 target = m->elems[0];
914 n->elems[ei] = target;
919 * Case 2c. n is an internal node, and the subtrees to
920 * the left and right of e both have only one element.
921 * So combine the two subnodes into a single big node
922 * with their own elements on the left and right and e
923 * in the middle, then restart the deletion process on
924 * that subtree, with e still as target.
926 node234 *a = n->kids[ei], *b = n->kids[ei + 1];
930 a->elems[1] = n->elems[ei];
931 a->kids[2] = b->kids[0];
932 a->counts[2] = b->counts[0];
934 a->kids[2]->parent = a;
935 a->elems[2] = b->elems[0];
936 a->kids[3] = b->kids[1];
937 a->counts[3] = b->counts[1];
939 a->kids[3]->parent = a;
941 n->counts[ei] = countnode234(a);
943 * That's built the big node in a, and destroyed b. Now
944 * remove the reference to b (and e) in n.
946 for (j = ei; j < 2 && n->elems[j + 1]; j++) {
947 n->elems[j] = n->elems[j + 1];
948 n->kids[j + 1] = n->kids[j + 2];
949 n->counts[j + 1] = n->counts[j + 2];
952 n->kids[j + 1] = NULL;
953 n->counts[j + 1] = 0;
955 * It's possible, in this case, that we've just removed
956 * the only element in the root of the tree. If so,
959 if (n->elems[0] == NULL) {
960 LOG((" shifting root!\n"));
966 * Now go round the deletion process again, with n
967 * pointing at the new big node and e still the same.
970 index = a->counts[0] + a->counts[1] + 1;
974 void *delpos234(tree234 * t, int index)
976 if (index < 0 || index >= countnode234(t->root))
978 return delpos234_internal(t, index);
980 void *del234(tree234 * t, void *e)
983 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
984 return NULL; /* it wasn't in there anyway */
985 return delpos234_internal(t, index); /* it's there; delete it. */
991 * Test code for the 2-3-4 tree. This code maintains an alternative
992 * representation of the data in the tree, in an array (using the
993 * obvious and slow insert and delete functions). After each tree
994 * operation, the verify() function is called, which ensures all
995 * the tree properties are preserved:
996 * - node->child->parent always equals node
997 * - tree->root->parent always equals NULL
998 * - number of kids == 0 or number of elements + 1;
999 * - tree has the same depth everywhere
1000 * - every node has at least one element
1001 * - subtree element counts are accurate
1002 * - any NULL kid pointer is accompanied by a zero count
1003 * - in a sorted tree: ordering property between elements of a
1004 * node and elements of its children is preserved
1005 * and also ensures the list represented by the tree is the same
1006 * list it should be. (This last check also doubly verifies the
1007 * ordering properties, because the `same list it should be' is by
1008 * definition correctly ordered. It also ensures all nodes are
1009 * distinct, because the enum functions would get caught in a loop
1015 #define srealloc realloc
1018 * Error reporting function.
1020 void error(char *fmt, ...)
1025 vfprintf(stdout, fmt, ap);
1030 /* The array representation of the data. */
1032 int arraylen, arraysize;
1035 /* The tree representation of the same data. */
1043 int chknode(chkctx * ctx, int level, node234 * node,
1044 void *lowbound, void *highbound)
1050 /* Count the non-NULL kids. */
1051 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
1052 /* Ensure no kids beyond the first NULL are non-NULL. */
1053 for (i = nkids; i < 4; i++)
1054 if (node->kids[i]) {
1055 error("node %p: nkids=%d but kids[%d] non-NULL",
1057 } else if (node->counts[i]) {
1058 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
1059 node, i, i, node->counts[i]);
1062 /* Count the non-NULL elements. */
1063 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
1064 /* Ensure no elements beyond the first NULL are non-NULL. */
1065 for (i = nelems; i < 3; i++)
1066 if (node->elems[i]) {
1067 error("node %p: nelems=%d but elems[%d] non-NULL",
1073 * If nkids==0, this is a leaf node; verify that the tree
1074 * depth is the same everywhere.
1076 if (ctx->treedepth < 0)
1077 ctx->treedepth = level; /* we didn't know the depth yet */
1078 else if (ctx->treedepth != level)
1079 error("node %p: leaf at depth %d, previously seen depth %d",
1080 node, level, ctx->treedepth);
1083 * If nkids != 0, then it should be nelems+1, unless nelems
1084 * is 0 in which case nkids should also be 0 (and so we
1085 * shouldn't be in this condition at all).
1087 int shouldkids = (nelems ? nelems + 1 : 0);
1088 if (nkids != shouldkids) {
1089 error("node %p: %d elems should mean %d kids but has %d",
1090 node, nelems, shouldkids, nkids);
1095 * nelems should be at least 1.
1098 error("node %p: no elems", node, nkids);
1102 * Add nelems to the running element count of the whole tree.
1104 ctx->elemcount += nelems;
1107 * Check ordering property: all elements should be strictly >
1108 * lowbound, strictly < highbound, and strictly < each other in
1109 * sequence. (lowbound and highbound are NULL at edges of tree
1110 * - both NULL at root node - and NULL is considered to be <
1111 * everything and > everything. IYSWIM.)
1114 for (i = -1; i < nelems; i++) {
1115 void *lower = (i == -1 ? lowbound : node->elems[i]);
1117 (i + 1 == nelems ? highbound : node->elems[i + 1]);
1118 if (lower && higher && cmp(lower, higher) >= 0) {
1119 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1120 node, i, lower, i + 1, higher);
1126 * Check parent pointers: all non-NULL kids should have a
1127 * parent pointer coming back to this node.
1129 for (i = 0; i < nkids; i++)
1130 if (node->kids[i]->parent != node) {
1131 error("node %p kid %d: parent ptr is %p not %p",
1132 node, i, node->kids[i]->parent, node);
1137 * Now (finally!) recurse into subtrees.
1141 for (i = 0; i < nkids; i++) {
1142 void *lower = (i == 0 ? lowbound : node->elems[i - 1]);
1143 void *higher = (i >= nelems ? highbound : node->elems[i]);
1145 chknode(ctx, level + 1, node->kids[i], lower, higher);
1146 if (node->counts[i] != subcount) {
1147 error("node %p kid %d: count says %d, subtree really has %d",
1148 node, i, node->counts[i], subcount);
1162 ctx.treedepth = -1; /* depth unknown yet */
1163 ctx.elemcount = 0; /* no elements seen yet */
1165 * Verify validity of tree properties.
1168 if (tree->root->parent != NULL)
1169 error("root->parent is %p should be null", tree->root->parent);
1170 chknode(&ctx, 0, tree->root, NULL, NULL);
1172 printf("tree depth: %d\n", ctx.treedepth);
1174 * Enumerate the tree and ensure it matches up to the array.
1176 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1178 error("tree contains more than %d elements", arraylen);
1180 error("enum at position %d: array says %s, tree says %s",
1183 if (ctx.elemcount != i) {
1184 error("tree really contains %d elements, enum gave %d",
1188 error("enum gave only %d elements, array has %d", i, arraylen);
1191 if (ctx.elemcount != i) {
1192 error("tree really contains %d elements, count234 gave %d",
1197 void internal_addtest(void *elem, int index, void *realret)
1202 if (arraysize < arraylen + 1) {
1203 arraysize = arraylen + 1 + 256;
1204 array = (array == NULL ? smalloc(arraysize * sizeof(*array)) :
1205 srealloc(array, arraysize * sizeof(*array)));
1209 /* now i points to the first element >= elem */
1210 retval = elem; /* expect elem returned (success) */
1211 for (j = arraylen; j > i; j--)
1212 array[j] = array[j - 1];
1213 array[i] = elem; /* add elem to array */
1216 if (realret != retval) {
1217 error("add: retval was %p expected %p", realret, retval);
1223 void addtest(void *elem)
1228 realret = add234(tree, elem);
1231 while (i < arraylen && cmp(elem, array[i]) > 0)
1233 if (i < arraylen && !cmp(elem, array[i])) {
1234 void *retval = array[i]; /* expect that returned not elem */
1235 if (realret != retval) {
1236 error("add: retval was %p expected %p", realret, retval);
1239 internal_addtest(elem, i, realret);
1242 void addpostest(void *elem, int i)
1246 realret = addpos234(tree, elem, i);
1248 internal_addtest(elem, i, realret);
1251 void delpostest(int i)
1254 void *elem = array[i], *ret;
1256 /* i points to the right element */
1257 while (i < arraylen - 1) {
1258 array[i] = array[i + 1];
1261 arraylen--; /* delete elem from array */
1264 ret = del234(tree, elem);
1266 ret = delpos234(tree, index);
1269 error("del returned %p, expected %p", ret, elem);
1275 void deltest(void *elem)
1280 while (i < arraylen && cmp(elem, array[i]) > 0)
1282 if (i >= arraylen || cmp(elem, array[i]) != 0)
1283 return; /* don't do it! */
1287 /* A sample data set and test utility. Designed for pseudo-randomness,
1288 * and yet repeatability. */
1291 * This random number generator uses the `portable implementation'
1292 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1295 int randomnumber(unsigned *seed)
1297 *seed *= 1103515245;
1299 return ((*seed) / 65536) % 32768;
1302 int mycmp(void *av, void *bv)
1304 char const *a = (char const *) av;
1305 char const *b = (char const *) bv;
1306 return strcmp(a, b);
1309 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1312 "a", "ab", "absque", "coram", "de",
1313 "palam", "clam", "cum", "ex", "e",
1314 "sine", "tenus", "pro", "prae",
1315 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1316 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1317 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1318 "murfl", "spoo", "breen", "flarn", "octothorpe",
1319 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1320 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1321 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1322 "wand", "ring", "amulet"
1325 #define NSTR lenof(strings)
1329 const static int rels[] = {
1330 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1332 const static char *const relnames[] = {
1333 "EQ", "GE", "LE", "LT", "GT"
1335 int i, j, rel, index;
1336 char *p, *ret, *realret, *realret2;
1339 for (i = 0; i < NSTR; i++) {
1341 for (j = 0; j < sizeof(rels) / sizeof(*rels); j++) {
1347 mid = (lo + hi) / 2;
1348 c = strcmp(p, array[mid]);
1358 if (rel == REL234_LT)
1359 ret = (mid > 0 ? array[--mid] : NULL);
1360 else if (rel == REL234_GT)
1361 ret = (mid < arraylen - 1 ? array[++mid] : NULL);
1365 assert(lo == hi + 1);
1366 if (rel == REL234_LT || rel == REL234_LE) {
1368 ret = (hi >= 0 ? array[hi] : NULL);
1369 } else if (rel == REL234_GT || rel == REL234_GE) {
1371 ret = (lo < arraylen ? array[lo] : NULL);
1376 realret = findrelpos234(tree, p, NULL, rel, &index);
1377 if (realret != ret) {
1378 error("find(\"%s\",%s) gave %s should be %s",
1379 p, relnames[j], realret, ret);
1381 if (realret && index != mid) {
1382 error("find(\"%s\",%s) gave %d should be %d",
1383 p, relnames[j], index, mid);
1385 if (realret && rel == REL234_EQ) {
1386 realret2 = index234(tree, index);
1387 if (realret2 != realret) {
1388 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1389 p, relnames[j], realret, index, index, realret2);
1393 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1399 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1400 if (arraylen && (realret != array[0] || index != 0)) {
1401 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1402 realret, index, array[0]);
1403 } else if (!arraylen && (realret != NULL)) {
1404 error("find(NULL,GT) gave %s(%d) should be NULL", realret, index);
1407 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1409 && (realret != array[arraylen - 1] || index != arraylen - 1)) {
1410 error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index,
1411 array[arraylen - 1]);
1412 } else if (!arraylen && (realret != NULL)) {
1413 error("find(NULL,LT) gave %s(%d) should be NULL", realret, index);
1423 for (i = 0; i < NSTR; i++)
1426 arraylen = arraysize = 0;
1427 tree = newtree234(mycmp);
1431 for (i = 0; i < 10000; i++) {
1432 j = randomnumber(&seed);
1434 printf("trial: %d\n", i);
1436 printf("deleting %s (%d)\n", strings[j], j);
1437 deltest(strings[j]);
1440 printf("adding %s (%d)\n", strings[j], j);
1441 addtest(strings[j]);
1447 while (arraylen > 0) {
1448 j = randomnumber(&seed);
1456 * Now try an unsorted tree. We don't really need to test
1457 * delpos234 because we know del234 is based on it, so it's
1458 * already been tested in the above sorted-tree code; but for
1459 * completeness we'll use it to tear down our unsorted tree
1460 * once we've built it.
1462 tree = newtree234(NULL);
1465 for (i = 0; i < 1000; i++) {
1466 printf("trial: %d\n", i);
1467 j = randomnumber(&seed);
1469 k = randomnumber(&seed);
1470 k %= count234(tree) + 1;
1471 printf("adding string %s at index %d\n", strings[j], k);
1472 addpostest(strings[j], k);
1474 while (count234(tree) > 0) {
1475 printf("cleanup: tree size %d\n", count234(tree));
1476 j = randomnumber(&seed);
1477 j %= count234(tree);
1478 printf("deleting string %s from index %d\n", array[j], j);