Index: head/include/search.h =================================================================== --- head/include/search.h +++ head/include/search.h @@ -35,8 +35,9 @@ #ifdef _SEARCH_PRIVATE typedef struct node { - char *key; + void *key; struct node *llink, *rlink; + signed char balance; } node_t; struct que_elem { Index: head/lib/libc/stdlib/tdelete.c =================================================================== --- head/lib/libc/stdlib/tdelete.c +++ head/lib/libc/stdlib/tdelete.c @@ -1,72 +1,213 @@ -/* $NetBSD: tdelete.c,v 1.2 1999/09/16 11:45:37 lukem Exp $ */ - -/* - * Tree search generalized from Knuth (6.2.2) Algorithm T just like - * the AT&T man page says. - * - * The node_t structure is for internal use only, lint doesn't grok it. +/*- + * Copyright (c) 2015 Nuxi, https://nuxi.nl/ * - * Written by reading the System V Interface Definition, not the code. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Totally public domain. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include -#if 0 -#if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: tdelete.c,v 1.6 2012/06/25 22:32:45 abs Exp $"); -#endif /* LIBC_SCCS and not lint */ -#endif __FBSDID("$FreeBSD$"); -#define _SEARCH_PRIVATE +#define _SEARCH_PRIVATE #include +#include #include +#include "tsearch_path.h" /* - * find a node with given key - * - * vkey: key to be found - * vrootp: address of the root of the tree - * compar: function to carry out node comparisons - */ + * Makes a step to the left along the binary search tree. This step is + * also saved, so it can be replayed while rebalancing. +*/ +#define GO_LEFT() do { \ + if ((*leaf)->balance == 0 || \ + ((*leaf)->balance < 0 && (*leaf)->rlink->balance == 0)) { \ + /* \ + * If we reach a node that is balanced, or has a child \ + * in the opposite direction that is balanced, we know \ + * that we won't need to perform any rotations above \ + * this point. In this case rotations are always \ + * capable of keeping the subtree in balance. Make \ + * this the base node and reset the path. \ + */ \ + base = leaf; \ + path_init(&path); \ + } \ + path_taking_left(&path); \ + leaf = &(*leaf)->llink; \ +} while (0) + +/* Makes a step to the right along the binary search tree. */ +#define GO_RIGHT() do { \ + if ((*leaf)->balance == 0 || \ + ((*leaf)->balance > 0 && (*leaf)->llink->balance == 0)) { \ + base = leaf; \ + path_init(&path); \ + } \ + result = &(*leaf)->key; \ + path_taking_right(&path); \ + leaf = &(*leaf)->rlink; \ +} while (0) + void * -tdelete(const void * __restrict vkey, void ** __restrict vrootp, +tdelete(const void *restrict key, void **restrict rootp, int (*compar)(const void *, const void *)) { - node_t **rootp = (node_t **)vrootp; - node_t *p, *q, *r; + struct path path; + node_t *root, **base, **leaf, *old, **n, *x, *y, *z; + void *result; int cmp; - if (rootp == NULL || (p = *rootp) == NULL) - return NULL; + /* POSIX requires that tdelete() returns NULL if rootp is NULL. */ + if (rootp == NULL) + return (NULL); + root = *rootp; + + /* + * Find the leaf that needs to be removed. Return if we cannot + * find an existing entry. Keep track of the path that is taken + * to get to the node, as we will need it to adjust the + * balances. + */ + result = (void *)1; + path_init(&path); + base = &root; + leaf = &root; + for (;;) { + if (*leaf == NULL) + return (NULL); + cmp = compar(key, (*leaf)->key); + if (cmp < 0) { + result = &(*leaf)->key; + GO_LEFT(); + } else if (cmp > 0) { + result = &(*leaf)->key; + GO_RIGHT(); + } else { + break; + } + } - while ((cmp = (*compar)(vkey, (*rootp)->key)) != 0) { - p = *rootp; - rootp = (cmp < 0) ? - &(*rootp)->llink : /* follow llink branch */ - &(*rootp)->rlink; /* follow rlink branch */ - if (*rootp == NULL) - return NULL; /* key not found */ + /* Found a matching key in the tree. Remove the node. */ + if ((*leaf)->llink == NULL) { + /* Node has no left children. Replace by its right subtree. */ + old = *leaf; + *leaf = old->rlink; + free(old); + } else { + /* + * Node has left children. Replace this node's key by + * its predecessor's and remove that node instead. + */ + void **keyp = &(*leaf)->key; + GO_LEFT(); + while ((*leaf)->rlink != NULL) + GO_RIGHT(); + old = *leaf; + *keyp = old->key; + *leaf = old->llink; + free(old); } - r = (*rootp)->rlink; /* D1: */ - if ((q = (*rootp)->llink) == NULL) /* Left NULL? */ - q = r; - else if (r != NULL) { /* Right link is NULL? */ - if (r->llink == NULL) { /* D2: Find successor */ - r->llink = q; - q = r; - } else { /* D3: Find NULL link */ - for (q = r->llink; q->llink != NULL; q = r->llink) - r = q; - r->llink = q->rlink; - q->llink = (*rootp)->llink; - q->rlink = (*rootp)->rlink; + + /* + * Walk along the same path a second time and adjust the + * balances. Though this code looks similar to the rebalancing + * performed in tsearch(), it is not identical. We now also need + * to consider the case of outward imbalance in the right-right + * and left-left case that only exists when deleting. Hence the + * duplication of code. + */ + for (n = base; n != leaf;) { + if (path_took_left(&path)) { + x = *n; + if (x->balance < 0) { + y = x->rlink; + if (y->balance > 0) { + /* Right-left case. */ + z = y->llink; + x->rlink = z->llink; + z->llink = x; + y->llink = z->rlink; + z->rlink = y; + *n = z; + + x->balance = z->balance < 0 ? 1 : 0; + y->balance = z->balance > 0 ? -1 : 0; + z->balance = 0; + } else { + /* Right-right case. */ + x->rlink = y->llink; + y->llink = x; + *n = y; + + if (y->balance < 0) { + x->balance = 0; + y->balance = 0; + } else { + x->balance = -1; + y->balance = 1; + } + } + } else { + --x->balance; + } + n = &x->llink; + } else { + x = *n; + if (x->balance > 0) { + y = x->llink; + if (y->balance < 0) { + /* Left-right case. */ + z = y->rlink; + y->rlink = z->llink; + z->llink = y; + x->llink = z->rlink; + z->rlink = x; + *n = z; + + x->balance = z->balance > 0 ? -1 : 0; + y->balance = z->balance < 0 ? 1 : 0; + z->balance = 0; + } else { + /* Left-left case. */ + x->llink = y->rlink; + y->rlink = x; + *n = y; + + if (y->balance > 0) { + x->balance = 0; + y->balance = 0; + } else { + x->balance = 1; + y->balance = -1; + } + } + } else { + ++x->balance; + } + n = &x->rlink; } } - if (p != *rootp) - free(*rootp); /* D4: Free node */ - *rootp = q; /* link parent to new node */ - return p; + + /* Return the parent of the old entry. */ + *rootp = root; + return (result); } Index: head/lib/libc/stdlib/tsearch.3 =================================================================== --- head/lib/libc/stdlib/tsearch.3 +++ head/lib/libc/stdlib/tsearch.3 @@ -27,7 +27,7 @@ .\" OpenBSD: tsearch.3,v 1.2 1998/06/21 22:13:49 millert Exp .\" $FreeBSD$ .\" -.Dd June 15, 1997 +.Dd December 6, 2015 .Dt TSEARCH 3 .Os .Sh NAME @@ -50,8 +50,12 @@ .Fn tsearch , and .Fn twalk -functions manage binary search trees based on algorithms T and D -from Knuth (6.2.2). +functions manage binary search trees. +This implementation uses a balanced AVL tree, +which due to its strong theoretical limit on the height of the tree has +the advantage of calling the comparison function relatively +infrequently. +.Pp The comparison function passed in by the user has the same style of return values as .Xr strcmp 3 . Index: head/lib/libc/stdlib/tsearch.c =================================================================== --- head/lib/libc/stdlib/tsearch.c +++ head/lib/libc/stdlib/tsearch.c @@ -1,55 +1,200 @@ -/* $NetBSD: tsearch.c,v 1.7 2012/06/25 22:32:45 abs Exp $ */ - -/* - * Tree search generalized from Knuth (6.2.2) Algorithm T just like - * the AT&T man page says. - * - * The node_t structure is for internal use only, lint doesn't grok it. +/*- + * Copyright (c) 2015 Nuxi, https://nuxi.nl/ * - * Written by reading the System V Interface Definition, not the code. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Totally public domain. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include -#if 0 -#if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: tsearch.c,v 1.7 2012/06/25 22:32:45 abs Exp $"); -#endif /* LIBC_SCCS and not lint */ -#endif __FBSDID("$FreeBSD$"); -#define _SEARCH_PRIVATE +#define _SEARCH_PRIVATE #include #include -/* find or insert datum into search tree */ +#include "tsearch_path.h" + void * -tsearch(const void *vkey, void **vrootp, +tsearch(const void *key, void **rootp, int (*compar)(const void *, const void *)) { - node_t *q; - node_t **rootp = (node_t **)vrootp; + struct path path; + node_t *root, **base, **leaf, *result, *n, *x, *y, *z; + int cmp; + /* POSIX requires that tsearch() returns NULL if rootp is NULL. */ if (rootp == NULL) - return NULL; + return (NULL); + root = *rootp; - while (*rootp != NULL) { /* Knuth's T1: */ - int r; - - if ((r = (*compar)(vkey, (*rootp)->key)) == 0) /* T2: */ - return *rootp; /* we found it! */ + /* + * Find the leaf where the new key needs to be inserted. Return + * if we've found an existing entry. Keep track of the path that + * is taken to get to the node, as we will need it to adjust the + * balances. + */ + path_init(&path); + base = &root; + leaf = &root; + while (*leaf != NULL) { + if ((*leaf)->balance != 0) { + /* + * If we reach a node that has a non-zero + * balance on the way, we know that we won't + * need to perform any rotations above this + * point. In this case rotations are always + * capable of keeping the subtree in balance. + * Make this the base node and reset the path. + */ + base = leaf; + path_init(&path); + } + cmp = compar(key, (*leaf)->key); + if (cmp < 0) { + path_taking_left(&path); + leaf = &(*leaf)->llink; + } else if (cmp > 0) { + path_taking_right(&path); + leaf = &(*leaf)->rlink; + } else { + return (&(*leaf)->key); + } + } - rootp = (r < 0) ? - &(*rootp)->llink : /* T3: follow left branch */ - &(*rootp)->rlink; /* T4: follow right branch */ + /* Did not find a matching key in the tree. Insert a new node. */ + result = *leaf = malloc(sizeof(**leaf)); + if (result == NULL) + return (NULL); + result->key = (void *)key; + result->llink = NULL; + result->rlink = NULL; + result->balance = 0; + + /* + * Walk along the same path a second time and adjust the + * balances. Except for the first node, all of these nodes must + * have a balance of zero, meaning that these nodes will not get + * out of balance. + */ + for (n = *base; n != *leaf;) { + if (path_took_left(&path)) { + n->balance += 1; + n = n->llink; + } else { + n->balance -= 1; + n = n->rlink; + } } - q = malloc(sizeof(node_t)); /* T5: key not found */ - if (q != 0) { /* make new node */ - *rootp = q; /* link new node to old */ - q->key = __DECONST(void *, vkey);/* initialize new node */ - q->llink = q->rlink = NULL; + /* + * Adjusting the balances may have pushed the balance of the + * base node out of range. Perform a rotation to bring the + * balance back in range. + */ + x = *base; + if (x->balance > 1) { + y = x->llink; + if (y->balance < 0) { + /* + * Left-right case. + * + * x + * / \ z + * y D / \ + * / \ --> y x + * A z /| |\ + * / \ A B C D + * B C + */ + z = y->rlink; + y->rlink = z->llink; + z->llink = y; + x->llink = z->rlink; + z->rlink = x; + *base = z; + + x->balance = z->balance > 0 ? -1 : 0; + y->balance = z->balance < 0 ? 1 : 0; + z->balance = 0; + } else { + /* + * Left-left case. + * + * x y + * / \ / \ + * y C --> A x + * / \ / \ + * A B B C + */ + x->llink = y->rlink; + y->rlink = x; + *base = y; + + x->balance = 0; + y->balance = 0; + } + } else if (x->balance < -1) { + y = x->rlink; + if (y->balance > 0) { + /* + * Right-left case. + * + * x + * / \ z + * A y / \ + * / \ --> x y + * z D /| |\ + * / \ A B C D + * B C + */ + node_t *z = y->llink; + x->rlink = z->llink; + z->llink = x; + y->llink = z->rlink; + z->rlink = y; + *base = z; + + x->balance = z->balance < 0 ? 1 : 0; + y->balance = z->balance > 0 ? -1 : 0; + z->balance = 0; + } else { + /* + * Right-right case. + * + * x y + * / \ / \ + * A y --> x C + * / \ / \ + * B C A B + */ + x->rlink = y->llink; + y->llink = x; + *base = y; + + x->balance = 0; + y->balance = 0; + } } - return q; + + /* Return the new entry. */ + *rootp = root; + return (&result->key); } Index: head/lib/libc/tests/stdlib/Makefile =================================================================== --- head/lib/libc/tests/stdlib/Makefile +++ head/lib/libc/tests/stdlib/Makefile @@ -3,6 +3,7 @@ ATF_TESTS_C+= heapsort_test ATF_TESTS_C+= mergesort_test ATF_TESTS_C+= qsort_test +ATF_TESTS_C+= tsearch_test # TODO: t_getenv_thread, t_mi_vector_hash NETBSD_ATF_TESTS_C+= abs_test