2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
4 * This file is copyright 1999-2001 Simon Tatham.
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or
11 * sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #define smalloc malloc
37 #define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
40 #define LOG(x) (printf x)
45 typedef struct node234_Tag node234;
60 * Create a 2-3-4 tree.
62 tree234 *newtree234(cmpfn234 cmp) {
63 tree234 *ret = mknew(tree234);
64 LOG(("created tree %p\n", ret));
71 * Free a 2-3-4 tree (not including freeing the elements).
73 static void freenode234(node234 *n) {
76 freenode234(n->kids[0]);
77 freenode234(n->kids[1]);
78 freenode234(n->kids[2]);
79 freenode234(n->kids[3]);
82 void freetree234(tree234 *t) {
88 * Internal function to count a node.
90 static int countnode234(node234 *n) {
93 for (i = 0; i < 4; i++)
94 count += n->counts[i];
95 for (i = 0; i < 3; i++)
102 * Count the elements in a tree.
104 int count234(tree234 *t) {
106 return countnode234(t->root);
112 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
113 * an existing element compares equal, returns that.
115 static void *add234_internal(tree234 *t, void *e, int index) {
116 node234 *n, **np, *left, *right;
118 int c, lcount, rcount;
120 LOG(("adding node %p to tree %p\n", e, t));
121 if (t->root == NULL) {
122 t->root = mknew(node234);
123 t->root->elems[1] = t->root->elems[2] = NULL;
124 t->root->kids[0] = t->root->kids[1] = NULL;
125 t->root->kids[2] = t->root->kids[3] = NULL;
126 t->root->counts[0] = t->root->counts[1] = 0;
127 t->root->counts[2] = t->root->counts[3] = 0;
128 t->root->parent = NULL;
129 t->root->elems[0] = e;
130 LOG((" created root %p\n", t->root));
138 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
140 n->kids[0], n->counts[0], n->elems[0],
141 n->kids[1], n->counts[1], n->elems[1],
142 n->kids[2], n->counts[2], n->elems[2],
143 n->kids[3], n->counts[3]));
147 * Leaf node. We want to insert at kid position
148 * equal to the index:
155 * Internal node. We always descend through it (add
156 * always starts at the bottom, never in the
159 do { /* this is a do ... while (0) to allow `break' */
160 if (index <= n->counts[0]) {
164 index -= n->counts[0] + 1;
165 if (index <= n->counts[1]) {
169 index -= n->counts[1] + 1;
170 if (index <= n->counts[2]) {
174 index -= n->counts[2] + 1;
175 if (index <= n->counts[3]) {
179 return NULL; /* error: index out of range */
183 if ((c = t->cmp(e, n->elems[0])) < 0)
186 return n->elems[0]; /* already exists */
187 else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
190 return n->elems[1]; /* already exists */
191 else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
194 return n->elems[2]; /* already exists */
198 np = &n->kids[childnum];
199 LOG((" moving to child %d (%p)\n", childnum, *np));
203 * We need to insert the new element in n at position np.
205 left = NULL; lcount = 0;
206 right = NULL; rcount = 0;
208 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
210 n->kids[0], n->counts[0], n->elems[0],
211 n->kids[1], n->counts[1], n->elems[1],
212 n->kids[2], n->counts[2], n->elems[2],
213 n->kids[3], n->counts[3]));
214 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
215 left, lcount, e, right, rcount, np - n->kids));
216 if (n->elems[1] == NULL) {
218 * Insert in a 2-node; simple.
220 if (np == &n->kids[0]) {
221 LOG((" inserting on left of 2-node\n"));
222 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
223 n->elems[1] = n->elems[0];
224 n->kids[1] = right; n->counts[1] = rcount;
226 n->kids[0] = left; n->counts[0] = lcount;
227 } else { /* np == &n->kids[1] */
228 LOG((" inserting on right of 2-node\n"));
229 n->kids[2] = right; n->counts[2] = rcount;
231 n->kids[1] = left; n->counts[1] = lcount;
233 if (n->kids[0]) n->kids[0]->parent = n;
234 if (n->kids[1]) n->kids[1]->parent = n;
235 if (n->kids[2]) n->kids[2]->parent = n;
238 } else if (n->elems[2] == NULL) {
240 * Insert in a 3-node; simple.
242 if (np == &n->kids[0]) {
243 LOG((" inserting on left of 3-node\n"));
244 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
245 n->elems[2] = n->elems[1];
246 n->kids[2] = n->kids[1]; n->counts[2] = n->counts[1];
247 n->elems[1] = n->elems[0];
248 n->kids[1] = right; n->counts[1] = rcount;
250 n->kids[0] = left; n->counts[0] = lcount;
251 } else if (np == &n->kids[1]) {
252 LOG((" inserting in middle of 3-node\n"));
253 n->kids[3] = n->kids[2]; n->counts[3] = n->counts[2];
254 n->elems[2] = n->elems[1];
255 n->kids[2] = right; n->counts[2] = rcount;
257 n->kids[1] = left; n->counts[1] = lcount;
258 } else { /* np == &n->kids[2] */
259 LOG((" inserting on right of 3-node\n"));
260 n->kids[3] = right; n->counts[3] = rcount;
262 n->kids[2] = left; n->counts[2] = lcount;
264 if (n->kids[0]) n->kids[0]->parent = n;
265 if (n->kids[1]) n->kids[1]->parent = n;
266 if (n->kids[2]) n->kids[2]->parent = n;
267 if (n->kids[3]) n->kids[3]->parent = n;
271 node234 *m = mknew(node234);
272 m->parent = n->parent;
273 LOG((" splitting a 4-node; created new node %p\n", m));
275 * Insert in a 4-node; split into a 2-node and a
276 * 3-node, and move focus up a level.
278 * I don't think it matters which way round we put the
279 * 2 and the 3. For simplicity, we'll put the 3 first
282 if (np == &n->kids[0]) {
283 m->kids[0] = left; m->counts[0] = lcount;
285 m->kids[1] = right; m->counts[1] = rcount;
286 m->elems[1] = n->elems[0];
287 m->kids[2] = n->kids[1]; m->counts[2] = n->counts[1];
289 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
290 n->elems[0] = n->elems[2];
291 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
292 } else if (np == &n->kids[1]) {
293 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
294 m->elems[0] = n->elems[0];
295 m->kids[1] = left; m->counts[1] = lcount;
297 m->kids[2] = right; m->counts[2] = rcount;
299 n->kids[0] = n->kids[2]; n->counts[0] = n->counts[2];
300 n->elems[0] = n->elems[2];
301 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
302 } else if (np == &n->kids[2]) {
303 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
304 m->elems[0] = n->elems[0];
305 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
306 m->elems[1] = n->elems[1];
307 m->kids[2] = left; m->counts[2] = lcount;
309 n->kids[0] = right; n->counts[0] = rcount;
310 n->elems[0] = n->elems[2];
311 n->kids[1] = n->kids[3]; n->counts[1] = n->counts[3];
312 } else { /* np == &n->kids[3] */
313 m->kids[0] = n->kids[0]; m->counts[0] = n->counts[0];
314 m->elems[0] = n->elems[0];
315 m->kids[1] = n->kids[1]; m->counts[1] = n->counts[1];
316 m->elems[1] = n->elems[1];
317 m->kids[2] = n->kids[2]; m->counts[2] = n->counts[2];
318 n->kids[0] = left; n->counts[0] = lcount;
320 n->kids[1] = right; n->counts[1] = rcount;
323 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
324 m->counts[3] = n->counts[3] = n->counts[2] = 0;
325 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
326 if (m->kids[0]) m->kids[0]->parent = m;
327 if (m->kids[1]) m->kids[1]->parent = m;
328 if (m->kids[2]) m->kids[2]->parent = m;
329 if (n->kids[0]) n->kids[0]->parent = n;
330 if (n->kids[1]) n->kids[1]->parent = n;
331 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
332 m->kids[0], m->counts[0], m->elems[0],
333 m->kids[1], m->counts[1], m->elems[1],
334 m->kids[2], m->counts[2]));
335 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
336 n->kids[0], n->counts[0], n->elems[0],
337 n->kids[1], n->counts[1]));
338 left = m; lcount = countnode234(left);
339 right = n; rcount = countnode234(right);
342 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
343 n->parent->kids[1] == n ? &n->parent->kids[1] :
344 n->parent->kids[2] == n ? &n->parent->kids[2] :
345 &n->parent->kids[3]);
350 * If we've come out of here by `break', n will still be
351 * non-NULL and all we need to do is go back up the tree
352 * updating counts. If we've come here because n is NULL, we
353 * need to create a new root for the tree because the old one
354 * has just split into two. */
357 int count = countnode234(n);
359 childnum = (n->parent->kids[0] == n ? 0 :
360 n->parent->kids[1] == n ? 1 :
361 n->parent->kids[2] == n ? 2 : 3);
362 n->parent->counts[childnum] = count;
366 LOG((" root is overloaded, split into two\n"));
367 t->root = mknew(node234);
368 t->root->kids[0] = left; t->root->counts[0] = lcount;
369 t->root->elems[0] = e;
370 t->root->kids[1] = right; t->root->counts[1] = rcount;
371 t->root->elems[1] = NULL;
372 t->root->kids[2] = NULL; t->root->counts[2] = 0;
373 t->root->elems[2] = NULL;
374 t->root->kids[3] = NULL; t->root->counts[3] = 0;
375 t->root->parent = NULL;
376 if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
377 if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
378 LOG((" new root is %p/%d [%p] %p/%d\n",
379 t->root->kids[0], t->root->counts[0],
381 t->root->kids[1], t->root->counts[1]));
387 void *add234(tree234 *t, void *e) {
388 if (!t->cmp) /* tree is unsorted */
391 return add234_internal(t, e, -1);
393 void *addpos234(tree234 *t, void *e, int index) {
394 if (index < 0 || /* index out of range */
395 t->cmp) /* tree is sorted */
396 return NULL; /* return failure */
398 return add234_internal(t, e, index); /* this checks the upper bound */
402 * Look up the element at a given numeric index in a 2-3-4 tree.
403 * Returns NULL if the index is out of range.
405 void *index234(tree234 *t, int index) {
409 return NULL; /* tree is empty */
411 if (index < 0 || index >= countnode234(t->root))
412 return NULL; /* out of range */
417 if (index < n->counts[0])
419 else if (index -= n->counts[0] + 1, index < 0)
421 else if (index < n->counts[1])
423 else if (index -= n->counts[1] + 1, index < 0)
425 else if (index < n->counts[2])
427 else if (index -= n->counts[2] + 1, index < 0)
433 /* We shouldn't ever get here. I wonder how we did. */
438 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
439 * found. e is always passed as the first argument to cmp, so cmp
440 * can be an asymmetric function if desired. cmp can also be passed
441 * as NULL, in which case the compare function from the tree proper
444 void *findrelpos234(tree234 *t, void *e, cmpfn234 cmp,
445 int relation, int *index) {
449 int idx, ecount, kcount, cmpret;
459 * Attempt to find the element itself.
464 * Prepare a fake `cmp' result if e is NULL.
468 assert(relation == REL234_LT || relation == REL234_GT);
469 if (relation == REL234_LT)
470 cmpret = +1; /* e is a max: always greater */
471 else if (relation == REL234_GT)
472 cmpret = -1; /* e is a min: always smaller */
475 for (kcount = 0; kcount < 4; kcount++) {
476 if (kcount >= 3 || n->elems[kcount] == NULL ||
477 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
480 if (n->kids[kcount]) idx += n->counts[kcount];
497 * We have found the element we're looking for. It's
498 * n->elems[ecount], at tree index idx. If our search
499 * relation is EQ, LE or GE we can now go home.
501 if (relation != REL234_LT && relation != REL234_GT) {
502 if (index) *index = idx;
503 return n->elems[ecount];
507 * Otherwise, we'll do an indexed lookup for the previous
508 * or next element. (It would be perfectly possible to
509 * implement these search types in a non-counted tree by
510 * going back up from where we are, but far more fiddly.)
512 if (relation == REL234_LT)
518 * We've found our way to the bottom of the tree and we
519 * know where we would insert this node if we wanted to:
520 * we'd put it in in place of the (empty) subtree
521 * n->kids[kcount], and it would have index idx
523 * But the actual element isn't there. So if our search
524 * relation is EQ, we're doomed.
526 if (relation == REL234_EQ)
530 * Otherwise, we must do an index lookup for index idx-1
531 * (if we're going left - LE or LT) or index idx (if we're
532 * going right - GE or GT).
534 if (relation == REL234_LT || relation == REL234_LE) {
540 * We know the index of the element we want; just call index234
541 * to do the rest. This will return NULL if the index is out of
542 * bounds, which is exactly what we want.
544 ret = index234(t, idx);
545 if (ret && index) *index = idx;
548 void *find234(tree234 *t, void *e, cmpfn234 cmp) {
549 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
551 void *findrel234(tree234 *t, void *e, cmpfn234 cmp, int relation) {
552 return findrelpos234(t, e, cmp, relation, NULL);
554 void *findpos234(tree234 *t, void *e, cmpfn234 cmp, int *index) {
555 return findrelpos234(t, e, cmp, REL234_EQ, index);
559 * Delete an element e in a 2-3-4 tree. Does not free the element,
560 * merely removes all links to it from the tree nodes.
562 static void *delpos234_internal(tree234 *t, int index) {
570 LOG(("deleting item %d from tree %p\n", index, t));
577 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
579 n->kids[0], n->counts[0], n->elems[0],
580 n->kids[1], n->counts[1], n->elems[1],
581 n->kids[2], n->counts[2], n->elems[2],
582 n->kids[3], n->counts[3],
584 if (index < n->counts[0]) {
586 } else if (index -= n->counts[0]+1, index < 0) {
588 } else if (index < n->counts[1]) {
590 } else if (index -= n->counts[1]+1, index < 0) {
592 } else if (index < n->counts[2]) {
594 } else if (index -= n->counts[2]+1, index < 0) {
600 * Recurse down to subtree ki. If it has only one element,
601 * we have to do some transformation to start with.
603 LOG((" moving to subtree %d\n", ki));
605 if (!sub->elems[1]) {
606 LOG((" subtree has only one element!\n", ki));
607 if (ki > 0 && n->kids[ki-1]->elems[1]) {
609 * Case 3a, left-handed variant. Child ki has
610 * only one element, but child ki-1 has two or
611 * more. So we need to move a subtree from ki-1
616 * [more] a A b B c d D e [more] a A b c C d D e
618 node234 *sib = n->kids[ki-1];
619 int lastelem = (sib->elems[2] ? 2 :
620 sib->elems[1] ? 1 : 0);
621 sub->kids[2] = sub->kids[1];
622 sub->counts[2] = sub->counts[1];
623 sub->elems[1] = sub->elems[0];
624 sub->kids[1] = sub->kids[0];
625 sub->counts[1] = sub->counts[0];
626 sub->elems[0] = n->elems[ki-1];
627 sub->kids[0] = sib->kids[lastelem+1];
628 sub->counts[0] = sib->counts[lastelem+1];
629 if (sub->kids[0]) sub->kids[0]->parent = sub;
630 n->elems[ki-1] = sib->elems[lastelem];
631 sib->kids[lastelem+1] = NULL;
632 sib->counts[lastelem+1] = 0;
633 sib->elems[lastelem] = NULL;
634 n->counts[ki] = countnode234(sub);
635 LOG((" case 3a left\n"));
636 LOG((" index and left subtree count before adjustment: %d, %d\n",
637 index, n->counts[ki-1]));
638 index += n->counts[ki-1];
639 n->counts[ki-1] = countnode234(sib);
640 index -= n->counts[ki-1];
641 LOG((" index and left subtree count after adjustment: %d, %d\n",
642 index, n->counts[ki-1]));
643 } else if (ki < 3 && n->kids[ki+1] &&
644 n->kids[ki+1]->elems[1]) {
646 * Case 3a, right-handed variant. ki has only
647 * one element but ki+1 has two or more. Move a
648 * subtree from ki+1 to ki.
652 * a A b c C d D e [more] a A b B c d D e [more]
654 node234 *sib = n->kids[ki+1];
656 sub->elems[1] = n->elems[ki];
657 sub->kids[2] = sib->kids[0];
658 sub->counts[2] = sib->counts[0];
659 if (sub->kids[2]) sub->kids[2]->parent = sub;
660 n->elems[ki] = sib->elems[0];
661 sib->kids[0] = sib->kids[1];
662 sib->counts[0] = sib->counts[1];
663 for (j = 0; j < 2 && sib->elems[j+1]; j++) {
664 sib->kids[j+1] = sib->kids[j+2];
665 sib->counts[j+1] = sib->counts[j+2];
666 sib->elems[j] = sib->elems[j+1];
668 sib->kids[j+1] = NULL;
669 sib->counts[j+1] = 0;
670 sib->elems[j] = NULL;
671 n->counts[ki] = countnode234(sub);
672 n->counts[ki+1] = countnode234(sib);
673 LOG((" case 3a right\n"));
676 * Case 3b. ki has only one element, and has no
677 * neighbour with more than one. So pick a
678 * neighbour and merge it with ki, taking an
679 * element down from n to go in the middle.
683 * a A b c C d a A b B c C d
685 * (Since at all points we have avoided
686 * descending to a node with only one element,
687 * we can be sure that n is not reduced to
688 * nothingness by this move, _unless_ it was
689 * the very first node, ie the root of the
690 * tree. In that case we remove the now-empty
691 * root and replace it with its single large
699 index += n->counts[ki] + 1;
704 sub->kids[3] = sub->kids[1];
705 sub->counts[3] = sub->counts[1];
706 sub->elems[2] = sub->elems[0];
707 sub->kids[2] = sub->kids[0];
708 sub->counts[2] = sub->counts[0];
709 sub->elems[1] = n->elems[ki];
710 sub->kids[1] = sib->kids[1];
711 sub->counts[1] = sib->counts[1];
712 if (sub->kids[1]) sub->kids[1]->parent = sub;
713 sub->elems[0] = sib->elems[0];
714 sub->kids[0] = sib->kids[0];
715 sub->counts[0] = sib->counts[0];
716 if (sub->kids[0]) sub->kids[0]->parent = sub;
718 n->counts[ki+1] = countnode234(sub);
723 * That's built the big node in sub. Now we
724 * need to remove the reference to sib in n.
726 for (j = ki; j < 3 && n->kids[j+1]; j++) {
727 n->kids[j] = n->kids[j+1];
728 n->counts[j] = n->counts[j+1];
729 n->elems[j] = j<2 ? n->elems[j+1] : NULL;
733 if (j < 3) n->elems[j] = NULL;
734 LOG((" case 3b ki=%d\n", ki));
738 * The root is empty and needs to be
741 LOG((" shifting root!\n"));
751 retval = n->elems[ei];
754 return NULL; /* although this shouldn't happen */
757 * Treat special case: this is the one remaining item in
758 * the tree. n is the tree root (no parent), has one
759 * element (no elems[1]), and has no kids (no kids[0]).
761 if (!n->parent && !n->elems[1] && !n->kids[0]) {
762 LOG((" removed last element in tree\n"));
769 * Now we have the element we want, as n->elems[ei], and we
770 * have also arranged for that element not to be the only
771 * one in its node. So...
774 if (!n->kids[0] && n->elems[1]) {
776 * Case 1. n is a leaf node with more than one element,
777 * so it's _really easy_. Just delete the thing and
782 for (i = ei; i < 2 && n->elems[i+1]; i++)
783 n->elems[i] = n->elems[i+1];
786 * Having done that to the leaf node, we now go back up
787 * the tree fixing the counts.
791 childnum = (n->parent->kids[0] == n ? 0 :
792 n->parent->kids[1] == n ? 1 :
793 n->parent->kids[2] == n ? 2 : 3);
794 n->parent->counts[childnum]--;
797 return retval; /* finished! */
798 } else if (n->kids[ei]->elems[1]) {
800 * Case 2a. n is an internal node, and the root of the
801 * subtree to the left of e has more than one element.
802 * So find the predecessor p to e (ie the largest node
803 * in that subtree), place it where e currently is, and
804 * then start the deletion process over again on the
805 * subtree with p as target.
807 node234 *m = n->kids[ei];
811 m = (m->kids[3] ? m->kids[3] :
812 m->kids[2] ? m->kids[2] :
813 m->kids[1] ? m->kids[1] : m->kids[0]);
815 target = (m->elems[2] ? m->elems[2] :
816 m->elems[1] ? m->elems[1] : m->elems[0]);
817 n->elems[ei] = target;
818 index = n->counts[ei]-1;
820 } else if (n->kids[ei+1]->elems[1]) {
822 * Case 2b, symmetric to 2a but s/left/right/ and
823 * s/predecessor/successor/. (And s/largest/smallest/).
825 node234 *m = n->kids[ei+1];
831 target = m->elems[0];
832 n->elems[ei] = target;
837 * Case 2c. n is an internal node, and the subtrees to
838 * the left and right of e both have only one element.
839 * So combine the two subnodes into a single big node
840 * with their own elements on the left and right and e
841 * in the middle, then restart the deletion process on
842 * that subtree, with e still as target.
844 node234 *a = n->kids[ei], *b = n->kids[ei+1];
848 a->elems[1] = n->elems[ei];
849 a->kids[2] = b->kids[0];
850 a->counts[2] = b->counts[0];
851 if (a->kids[2]) a->kids[2]->parent = a;
852 a->elems[2] = b->elems[0];
853 a->kids[3] = b->kids[1];
854 a->counts[3] = b->counts[1];
855 if (a->kids[3]) a->kids[3]->parent = a;
857 n->counts[ei] = countnode234(a);
859 * That's built the big node in a, and destroyed b. Now
860 * remove the reference to b (and e) in n.
862 for (j = ei; j < 2 && n->elems[j+1]; j++) {
863 n->elems[j] = n->elems[j+1];
864 n->kids[j+1] = n->kids[j+2];
865 n->counts[j+1] = n->counts[j+2];
871 * It's possible, in this case, that we've just removed
872 * the only element in the root of the tree. If so,
875 if (n->elems[0] == NULL) {
876 LOG((" shifting root!\n"));
882 * Now go round the deletion process again, with n
883 * pointing at the new big node and e still the same.
886 index = a->counts[0] + a->counts[1] + 1;
890 void *delpos234(tree234 *t, int index) {
891 if (index < 0 || index >= countnode234(t->root))
893 return delpos234_internal(t, index);
895 void *del234(tree234 *t, void *e) {
897 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
898 return NULL; /* it wasn't in there anyway */
899 return delpos234_internal(t, index); /* it's there; delete it. */
905 * Test code for the 2-3-4 tree. This code maintains an alternative
906 * representation of the data in the tree, in an array (using the
907 * obvious and slow insert and delete functions). After each tree
908 * operation, the verify() function is called, which ensures all
909 * the tree properties are preserved:
910 * - node->child->parent always equals node
911 * - tree->root->parent always equals NULL
912 * - number of kids == 0 or number of elements + 1;
913 * - tree has the same depth everywhere
914 * - every node has at least one element
915 * - subtree element counts are accurate
916 * - any NULL kid pointer is accompanied by a zero count
917 * - in a sorted tree: ordering property between elements of a
918 * node and elements of its children is preserved
919 * and also ensures the list represented by the tree is the same
920 * list it should be. (This last check also doubly verifies the
921 * ordering properties, because the `same list it should be' is by
922 * definition correctly ordered. It also ensures all nodes are
923 * distinct, because the enum functions would get caught in a loop
929 #define srealloc realloc
932 * Error reporting function.
934 void error(char *fmt, ...) {
938 vfprintf(stdout, fmt, ap);
943 /* The array representation of the data. */
945 int arraylen, arraysize;
948 /* The tree representation of the same data. */
956 int chknode(chkctx *ctx, int level, node234 *node,
957 void *lowbound, void *highbound) {
962 /* Count the non-NULL kids. */
963 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
964 /* Ensure no kids beyond the first NULL are non-NULL. */
965 for (i = nkids; i < 4; i++)
967 error("node %p: nkids=%d but kids[%d] non-NULL",
969 } else if (node->counts[i]) {
970 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
971 node, i, i, node->counts[i]);
974 /* Count the non-NULL elements. */
975 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
976 /* Ensure no elements beyond the first NULL are non-NULL. */
977 for (i = nelems; i < 3; i++)
978 if (node->elems[i]) {
979 error("node %p: nelems=%d but elems[%d] non-NULL",
985 * If nkids==0, this is a leaf node; verify that the tree
986 * depth is the same everywhere.
988 if (ctx->treedepth < 0)
989 ctx->treedepth = level; /* we didn't know the depth yet */
990 else if (ctx->treedepth != level)
991 error("node %p: leaf at depth %d, previously seen depth %d",
992 node, level, ctx->treedepth);
995 * If nkids != 0, then it should be nelems+1, unless nelems
996 * is 0 in which case nkids should also be 0 (and so we
997 * shouldn't be in this condition at all).
999 int shouldkids = (nelems ? nelems+1 : 0);
1000 if (nkids != shouldkids) {
1001 error("node %p: %d elems should mean %d kids but has %d",
1002 node, nelems, shouldkids, nkids);
1007 * nelems should be at least 1.
1010 error("node %p: no elems", node, nkids);
1014 * Add nelems to the running element count of the whole tree.
1016 ctx->elemcount += nelems;
1019 * Check ordering property: all elements should be strictly >
1020 * lowbound, strictly < highbound, and strictly < each other in
1021 * sequence. (lowbound and highbound are NULL at edges of tree
1022 * - both NULL at root node - and NULL is considered to be <
1023 * everything and > everything. IYSWIM.)
1026 for (i = -1; i < nelems; i++) {
1027 void *lower = (i == -1 ? lowbound : node->elems[i]);
1028 void *higher = (i+1 == nelems ? highbound : node->elems[i+1]);
1029 if (lower && higher && cmp(lower, higher) >= 0) {
1030 error("node %p: kid comparison [%d=%s,%d=%s] failed",
1031 node, i, lower, i+1, higher);
1037 * Check parent pointers: all non-NULL kids should have a
1038 * parent pointer coming back to this node.
1040 for (i = 0; i < nkids; i++)
1041 if (node->kids[i]->parent != node) {
1042 error("node %p kid %d: parent ptr is %p not %p",
1043 node, i, node->kids[i]->parent, node);
1048 * Now (finally!) recurse into subtrees.
1052 for (i = 0; i < nkids; i++) {
1053 void *lower = (i == 0 ? lowbound : node->elems[i-1]);
1054 void *higher = (i >= nelems ? highbound : node->elems[i]);
1055 int subcount = chknode(ctx, level+1, node->kids[i], lower, higher);
1056 if (node->counts[i] != subcount) {
1057 error("node %p kid %d: count says %d, subtree really has %d",
1058 node, i, node->counts[i], subcount);
1071 ctx.treedepth = -1; /* depth unknown yet */
1072 ctx.elemcount = 0; /* no elements seen yet */
1074 * Verify validity of tree properties.
1077 if (tree->root->parent != NULL)
1078 error("root->parent is %p should be null", tree->root->parent);
1079 chknode(&ctx, 0, tree->root, NULL, NULL);
1081 printf("tree depth: %d\n", ctx.treedepth);
1083 * Enumerate the tree and ensure it matches up to the array.
1085 for (i = 0; NULL != (p = index234(tree, i)); i++) {
1087 error("tree contains more than %d elements", arraylen);
1089 error("enum at position %d: array says %s, tree says %s",
1092 if (ctx.elemcount != i) {
1093 error("tree really contains %d elements, enum gave %d",
1097 error("enum gave only %d elements, array has %d", i, arraylen);
1100 if (ctx.elemcount != i) {
1101 error("tree really contains %d elements, count234 gave %d",
1106 void internal_addtest(void *elem, int index, void *realret) {
1110 if (arraysize < arraylen+1) {
1111 arraysize = arraylen+1+256;
1112 array = (array == NULL ? smalloc(arraysize*sizeof(*array)) :
1113 srealloc(array, arraysize*sizeof(*array)));
1117 /* now i points to the first element >= elem */
1118 retval = elem; /* expect elem returned (success) */
1119 for (j = arraylen; j > i; j--)
1120 array[j] = array[j-1];
1121 array[i] = elem; /* add elem to array */
1124 if (realret != retval) {
1125 error("add: retval was %p expected %p", realret, retval);
1131 void addtest(void *elem) {
1135 realret = add234(tree, elem);
1138 while (i < arraylen && cmp(elem, array[i]) > 0)
1140 if (i < arraylen && !cmp(elem, array[i])) {
1141 void *retval = array[i]; /* expect that returned not elem */
1142 if (realret != retval) {
1143 error("add: retval was %p expected %p", realret, retval);
1146 internal_addtest(elem, i, realret);
1149 void addpostest(void *elem, int i) {
1152 realret = addpos234(tree, elem, i);
1154 internal_addtest(elem, i, realret);
1157 void delpostest(int i) {
1159 void *elem = array[i], *ret;
1161 /* i points to the right element */
1162 while (i < arraylen-1) {
1163 array[i] = array[i+1];
1166 arraylen--; /* delete elem from array */
1169 ret = del234(tree, elem);
1171 ret = delpos234(tree, index);
1174 error("del returned %p, expected %p", ret, elem);
1180 void deltest(void *elem) {
1184 while (i < arraylen && cmp(elem, array[i]) > 0)
1186 if (i >= arraylen || cmp(elem, array[i]) != 0)
1187 return; /* don't do it! */
1191 /* A sample data set and test utility. Designed for pseudo-randomness,
1192 * and yet repeatability. */
1195 * This random number generator uses the `portable implementation'
1196 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
1199 int randomnumber(unsigned *seed) {
1200 *seed *= 1103515245;
1202 return ((*seed) / 65536) % 32768;
1205 int mycmp(void *av, void *bv) {
1206 char const *a = (char const *)av;
1207 char const *b = (char const *)bv;
1208 return strcmp(a, b);
1211 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
1214 "a", "ab", "absque", "coram", "de",
1215 "palam", "clam", "cum", "ex", "e",
1216 "sine", "tenus", "pro", "prae",
1217 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
1218 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
1219 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
1220 "murfl", "spoo", "breen", "flarn", "octothorpe",
1221 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
1222 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
1223 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
1224 "wand", "ring", "amulet"
1227 #define NSTR lenof(strings)
1229 int findtest(void) {
1230 const static int rels[] = {
1231 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
1233 const static char *const relnames[] = {
1234 "EQ", "GE", "LE", "LT", "GT"
1236 int i, j, rel, index;
1237 char *p, *ret, *realret, *realret2;
1240 for (i = 0; i < NSTR; i++) {
1242 for (j = 0; j < sizeof(rels)/sizeof(*rels); j++) {
1245 lo = 0; hi = arraylen-1;
1247 mid = (lo + hi) / 2;
1248 c = strcmp(p, array[mid]);
1258 if (rel == REL234_LT)
1259 ret = (mid > 0 ? array[--mid] : NULL);
1260 else if (rel == REL234_GT)
1261 ret = (mid < arraylen-1 ? array[++mid] : NULL);
1266 if (rel == REL234_LT || rel == REL234_LE) {
1268 ret = (hi >= 0 ? array[hi] : NULL);
1269 } else if (rel == REL234_GT || rel == REL234_GE) {
1271 ret = (lo < arraylen ? array[lo] : NULL);
1276 realret = findrelpos234(tree, p, NULL, rel, &index);
1277 if (realret != ret) {
1278 error("find(\"%s\",%s) gave %s should be %s",
1279 p, relnames[j], realret, ret);
1281 if (realret && index != mid) {
1282 error("find(\"%s\",%s) gave %d should be %d",
1283 p, relnames[j], index, mid);
1285 if (realret && rel == REL234_EQ) {
1286 realret2 = index234(tree, index);
1287 if (realret2 != realret) {
1288 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
1289 p, relnames[j], realret, index, index, realret2);
1293 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
1299 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
1300 if (arraylen && (realret != array[0] || index != 0)) {
1301 error("find(NULL,GT) gave %s(%d) should be %s(0)",
1302 realret, index, array[0]);
1303 } else if (!arraylen && (realret != NULL)) {
1304 error("find(NULL,GT) gave %s(%d) should be NULL",
1308 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
1309 if (arraylen && (realret != array[arraylen-1] || index != arraylen-1)) {
1310 error("find(NULL,LT) gave %s(%d) should be %s(0)",
1311 realret, index, array[arraylen-1]);
1312 } else if (!arraylen && (realret != NULL)) {
1313 error("find(NULL,LT) gave %s(%d) should be NULL",
1323 for (i = 0; i < NSTR; i++) in[i] = 0;
1325 arraylen = arraysize = 0;
1326 tree = newtree234(mycmp);
1330 for (i = 0; i < 10000; i++) {
1331 j = randomnumber(&seed);
1333 printf("trial: %d\n", i);
1335 printf("deleting %s (%d)\n", strings[j], j);
1336 deltest(strings[j]);
1339 printf("adding %s (%d)\n", strings[j], j);
1340 addtest(strings[j]);
1346 while (arraylen > 0) {
1347 j = randomnumber(&seed);
1355 * Now try an unsorted tree. We don't really need to test
1356 * delpos234 because we know del234 is based on it, so it's
1357 * already been tested in the above sorted-tree code; but for
1358 * completeness we'll use it to tear down our unsorted tree
1359 * once we've built it.
1361 tree = newtree234(NULL);
1364 for (i = 0; i < 1000; i++) {
1365 printf("trial: %d\n", i);
1366 j = randomnumber(&seed);
1368 k = randomnumber(&seed);
1369 k %= count234(tree)+1;
1370 printf("adding string %s at index %d\n", strings[j], k);
1371 addpostest(strings[j], k);
1373 while (count234(tree) > 0) {
1374 printf("cleanup: tree size %d\n", count234(tree));
1375 j = randomnumber(&seed);
1376 j %= count234(tree);
1377 printf("deleting string %s from index %d\n", array[j], j);