Index: head/lib/libc/db/hash/hash_page.c =================================================================== --- head/lib/libc/db/hash/hash_page.c (revision 298322) +++ head/lib/libc/db/hash/hash_page.c (revision 298323) @@ -1,935 +1,935 @@ /*- * Copyright (c) 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Margo Seltzer. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)hash_page.c 8.7 (Berkeley) 8/16/94"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD$"); /* * PACKAGE: hashing * * DESCRIPTION: * Page manipulation for hashing package. * * ROUTINES: * * External * __get_page * __add_ovflpage * Internal * overflow_page * open_temp */ #include "namespace.h" #include #include #include #include #include #include #include #include #ifdef DEBUG #include #endif #include "un-namespace.h" #include "libc_private.h" #include #include "hash.h" #include "page.h" #include "extern.h" static u_int32_t *fetch_bitmap(HTAB *, int); static u_int32_t first_free(u_int32_t); static int open_temp(HTAB *); static u_int16_t overflow_page(HTAB *); static void putpair(char *, const DBT *, const DBT *); static void squeeze_key(u_int16_t *, const DBT *, const DBT *); static int ugly_split(HTAB *, u_int32_t, BUFHEAD *, BUFHEAD *, int, int); #define PAGE_INIT(P) { \ ((u_int16_t *)(P))[0] = 0; \ ((u_int16_t *)(P))[1] = hashp->BSIZE - 3 * sizeof(u_int16_t); \ ((u_int16_t *)(P))[2] = hashp->BSIZE; \ } /* * This is called AFTER we have verified that there is room on the page for * the pair (PAIRFITS has returned true) so we go right ahead and start moving * stuff on. */ static void putpair(char *p, const DBT *key, const DBT *val) { u_int16_t *bp, n, off; bp = (u_int16_t *)p; /* Enter the key first. */ n = bp[0]; off = OFFSET(bp) - key->size; memmove(p + off, key->data, key->size); bp[++n] = off; /* Now the data. */ off -= val->size; memmove(p + off, val->data, val->size); bp[++n] = off; /* Adjust page info. */ bp[0] = n; bp[n + 1] = off - ((n + 3) * sizeof(u_int16_t)); bp[n + 2] = off; } /* * Returns: * 0 OK * -1 error */ int __delpair(HTAB *hashp, BUFHEAD *bufp, int ndx) { u_int16_t *bp, newoff, pairlen; int n; bp = (u_int16_t *)bufp->page; n = bp[0]; if (bp[ndx + 1] < REAL_KEY) return (__big_delete(hashp, bufp)); if (ndx != 1) newoff = bp[ndx - 1]; else newoff = hashp->BSIZE; pairlen = newoff - bp[ndx + 1]; if (ndx != (n - 1)) { /* Hard Case -- need to shuffle keys */ int i; char *src = bufp->page + (int)OFFSET(bp); char *dst = src + (int)pairlen; memmove(dst, src, bp[ndx + 1] - OFFSET(bp)); /* Now adjust the pointers */ for (i = ndx + 2; i <= n; i += 2) { if (bp[i + 1] == OVFLPAGE) { bp[i - 2] = bp[i]; bp[i - 1] = bp[i + 1]; } else { bp[i - 2] = bp[i] + pairlen; bp[i - 1] = bp[i + 1] + pairlen; } } if (ndx == hashp->cndx) { /* * We just removed pair we were "pointing" to. * By moving back the cndx we ensure subsequent * hash_seq() calls won't skip over any entries. */ hashp->cndx -= 2; } } /* Finally adjust the page data */ bp[n] = OFFSET(bp) + pairlen; bp[n - 1] = bp[n + 1] + pairlen + 2 * sizeof(u_int16_t); bp[0] = n - 2; hashp->NKEYS--; bufp->flags |= BUF_MOD; return (0); } /* * Returns: * 0 ==> OK * -1 ==> Error */ int __split_page(HTAB *hashp, u_int32_t obucket, u_int32_t nbucket) { BUFHEAD *new_bufp, *old_bufp; u_int16_t *ino; char *np; DBT key, val; int n, ndx, retval; u_int16_t copyto, diff, off, moved; char *op; copyto = (u_int16_t)hashp->BSIZE; off = (u_int16_t)hashp->BSIZE; old_bufp = __get_buf(hashp, obucket, NULL, 0); if (old_bufp == NULL) return (-1); new_bufp = __get_buf(hashp, nbucket, NULL, 0); if (new_bufp == NULL) return (-1); old_bufp->flags |= (BUF_MOD | BUF_PIN); new_bufp->flags |= (BUF_MOD | BUF_PIN); ino = (u_int16_t *)(op = old_bufp->page); np = new_bufp->page; moved = 0; for (n = 1, ndx = 1; n < ino[0]; n += 2) { if (ino[n + 1] < REAL_KEY) { retval = ugly_split(hashp, obucket, old_bufp, new_bufp, (int)copyto, (int)moved); old_bufp->flags &= ~BUF_PIN; new_bufp->flags &= ~BUF_PIN; return (retval); } key.data = (u_char *)op + ino[n]; key.size = off - ino[n]; if (__call_hash(hashp, key.data, key.size) == obucket) { /* Don't switch page */ diff = copyto - off; if (diff) { copyto = ino[n + 1] + diff; memmove(op + copyto, op + ino[n + 1], off - ino[n + 1]); ino[ndx] = copyto + ino[n] - ino[n + 1]; ino[ndx + 1] = copyto; } else copyto = ino[n + 1]; ndx += 2; } else { /* Switch page */ val.data = (u_char *)op + ino[n + 1]; val.size = ino[n] - ino[n + 1]; putpair(np, &key, &val); moved += 2; } off = ino[n + 1]; } /* Now clean up the page */ ino[0] -= moved; FREESPACE(ino) = copyto - sizeof(u_int16_t) * (ino[0] + 3); OFFSET(ino) = copyto; #ifdef DEBUG3 (void)fprintf(stderr, "split %d/%d\n", ((u_int16_t *)np)[0] / 2, ((u_int16_t *)op)[0] / 2); #endif /* unpin both pages */ old_bufp->flags &= ~BUF_PIN; new_bufp->flags &= ~BUF_PIN; return (0); } /* * Called when we encounter an overflow or big key/data page during split * handling. This is special cased since we have to begin checking whether * the key/data pairs fit on their respective pages and because we may need * overflow pages for both the old and new pages. * * The first page might be a page with regular key/data pairs in which case * we have a regular overflow condition and just need to go on to the next * page or it might be a big key/data pair in which case we need to fix the * big key/data pair. * * Returns: * 0 ==> success * -1 ==> failure */ static int ugly_split(HTAB *hashp, u_int32_t obucket, /* Same as __split_page. */ BUFHEAD *old_bufp, BUFHEAD *new_bufp, int copyto, /* First byte on page which contains key/data values. */ int moved) /* Number of pairs moved to new page. */ { BUFHEAD *bufp; /* Buffer header for ino */ u_int16_t *ino; /* Page keys come off of */ u_int16_t *np; /* New page */ u_int16_t *op; /* Page keys go on to if they aren't moving */ BUFHEAD *last_bfp; /* Last buf header OVFL needing to be freed */ DBT key, val; SPLIT_RETURN ret; u_int16_t n, off, ov_addr, scopyto; char *cino; /* Character value of ino */ bufp = old_bufp; ino = (u_int16_t *)old_bufp->page; np = (u_int16_t *)new_bufp->page; op = (u_int16_t *)old_bufp->page; last_bfp = NULL; scopyto = (u_int16_t)copyto; /* ANSI */ n = ino[0] - 1; while (n < ino[0]) { if (ino[2] < REAL_KEY && ino[2] != OVFLPAGE) { if (__big_split(hashp, old_bufp, new_bufp, bufp, bufp->addr, obucket, &ret)) return (-1); old_bufp = ret.oldp; if (!old_bufp) return (-1); op = (u_int16_t *)old_bufp->page; new_bufp = ret.newp; if (!new_bufp) return (-1); np = (u_int16_t *)new_bufp->page; bufp = ret.nextp; if (!bufp) return (0); cino = (char *)bufp->page; ino = (u_int16_t *)cino; last_bfp = ret.nextp; } else if (ino[n + 1] == OVFLPAGE) { ov_addr = ino[n]; /* * Fix up the old page -- the extra 2 are the fields * which contained the overflow information. */ ino[0] -= (moved + 2); FREESPACE(ino) = scopyto - sizeof(u_int16_t) * (ino[0] + 3); OFFSET(ino) = scopyto; bufp = __get_buf(hashp, ov_addr, bufp, 0); if (!bufp) return (-1); ino = (u_int16_t *)bufp->page; n = 1; scopyto = hashp->BSIZE; moved = 0; if (last_bfp) __free_ovflpage(hashp, last_bfp); last_bfp = bufp; } /* Move regular sized pairs of there are any */ off = hashp->BSIZE; for (n = 1; (n < ino[0]) && (ino[n + 1] >= REAL_KEY); n += 2) { cino = (char *)ino; key.data = (u_char *)cino + ino[n]; key.size = off - ino[n]; val.data = (u_char *)cino + ino[n + 1]; val.size = ino[n] - ino[n + 1]; off = ino[n + 1]; if (__call_hash(hashp, key.data, key.size) == obucket) { /* Keep on old page */ if (PAIRFITS(op, (&key), (&val))) putpair((char *)op, &key, &val); else { old_bufp = __add_ovflpage(hashp, old_bufp); if (!old_bufp) return (-1); op = (u_int16_t *)old_bufp->page; putpair((char *)op, &key, &val); } old_bufp->flags |= BUF_MOD; } else { /* Move to new page */ if (PAIRFITS(np, (&key), (&val))) putpair((char *)np, &key, &val); else { new_bufp = __add_ovflpage(hashp, new_bufp); if (!new_bufp) return (-1); np = (u_int16_t *)new_bufp->page; putpair((char *)np, &key, &val); } new_bufp->flags |= BUF_MOD; } } } if (last_bfp) __free_ovflpage(hashp, last_bfp); return (0); } /* * Add the given pair to the page * * Returns: * 0 ==> OK * 1 ==> failure */ int __addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val) { u_int16_t *bp, *sop; int do_expand; bp = (u_int16_t *)bufp->page; do_expand = 0; while (bp[0] && (bp[2] < REAL_KEY || bp[bp[0]] < REAL_KEY)) /* Exception case */ if (bp[2] == FULL_KEY_DATA && bp[0] == 2) /* This is the last page of a big key/data pair and we need to add another page */ break; else if (bp[2] < REAL_KEY && bp[bp[0]] != OVFLPAGE) { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; } else if (bp[bp[0]] != OVFLPAGE) { /* Short key/data pairs, no more pages */ break; } else { /* Try to squeeze key on this page */ if (bp[2] >= REAL_KEY && FREESPACE(bp) >= PAIRSIZE(key, val)) { squeeze_key(bp, key, val); goto stats; } else { bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0); if (!bufp) return (-1); bp = (u_int16_t *)bufp->page; } } if (PAIRFITS(bp, key, val)) putpair(bufp->page, key, val); else { do_expand = 1; bufp = __add_ovflpage(hashp, bufp); if (!bufp) return (-1); sop = (u_int16_t *)bufp->page; if (PAIRFITS(sop, key, val)) putpair((char *)sop, key, val); else if (__big_insert(hashp, bufp, key, val)) return (-1); } stats: bufp->flags |= BUF_MOD; /* * If the average number of keys per bucket exceeds the fill factor, * expand the table. */ hashp->NKEYS++; if (do_expand || (hashp->NKEYS / (hashp->MAX_BUCKET + 1) > hashp->FFACTOR)) return (__expand_table(hashp)); return (0); } /* * * Returns: * pointer on success * NULL on error */ BUFHEAD * __add_ovflpage(HTAB *hashp, BUFHEAD *bufp) { u_int16_t *sp, ndx, ovfl_num; #ifdef DEBUG1 int tmp1, tmp2; #endif sp = (u_int16_t *)bufp->page; /* Check if we are dynamically determining the fill factor */ if (hashp->FFACTOR == DEF_FFACTOR) { hashp->FFACTOR = sp[0] >> 1; if (hashp->FFACTOR < MIN_FFACTOR) hashp->FFACTOR = MIN_FFACTOR; } bufp->flags |= BUF_MOD; ovfl_num = overflow_page(hashp); #ifdef DEBUG1 tmp1 = bufp->addr; tmp2 = bufp->ovfl ? bufp->ovfl->addr : 0; #endif if (!ovfl_num || !(bufp->ovfl = __get_buf(hashp, ovfl_num, bufp, 1))) return (NULL); bufp->ovfl->flags |= BUF_MOD; #ifdef DEBUG1 (void)fprintf(stderr, "ADDOVFLPAGE: %d->ovfl was %d is now %d\n", tmp1, tmp2, bufp->ovfl->addr); #endif ndx = sp[0]; /* * Since a pair is allocated on a page only if there's room to add * an overflow page, we know that the OVFL information will fit on * the page. */ sp[ndx + 4] = OFFSET(sp); sp[ndx + 3] = FREESPACE(sp) - OVFLSIZE; sp[ndx + 1] = ovfl_num; sp[ndx + 2] = OVFLPAGE; sp[0] = ndx + 2; #ifdef HASH_STATISTICS hash_overflows++; #endif return (bufp->ovfl); } /* * Returns: * 0 indicates SUCCESS * -1 indicates FAILURE */ int __get_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_disk, int is_bitmap) { int fd, page, size, rsize; u_int16_t *bp; fd = hashp->fp; size = hashp->BSIZE; if ((fd == -1) || !is_disk) { PAGE_INIT(p); return (0); } if (is_bucket) page = BUCKET_TO_PAGE(bucket); else page = OADDR_TO_PAGE(bucket); if ((rsize = pread(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1) return (-1); bp = (u_int16_t *)p; if (!rsize) bp[0] = 0; /* We hit the EOF, so initialize a new page */ else if (rsize != size) { errno = EFTYPE; return (-1); } if (!is_bitmap && !bp[0]) { PAGE_INIT(p); } else if (hashp->LORDER != BYTE_ORDER) { int i, max; if (is_bitmap) { max = hashp->BSIZE >> 2; /* divide by 4 */ for (i = 0; i < max; i++) M_32_SWAP(((int *)p)[i]); } else { M_16_SWAP(bp[0]); max = bp[0] + 2; for (i = 1; i <= max; i++) M_16_SWAP(bp[i]); } } return (0); } /* * Write page p to disk * * Returns: * 0 ==> OK * -1 ==>failure */ int __put_page(HTAB *hashp, char *p, u_int32_t bucket, int is_bucket, int is_bitmap) { int fd, page, size, wsize; size = hashp->BSIZE; if ((hashp->fp == -1) && open_temp(hashp)) return (-1); fd = hashp->fp; if (hashp->LORDER != BYTE_ORDER) { int i, max; if (is_bitmap) { max = hashp->BSIZE >> 2; /* divide by 4 */ for (i = 0; i < max; i++) M_32_SWAP(((int *)p)[i]); } else { max = ((u_int16_t *)p)[0] + 2; for (i = 0; i <= max; i++) M_16_SWAP(((u_int16_t *)p)[i]); } } if (is_bucket) page = BUCKET_TO_PAGE(bucket); else page = OADDR_TO_PAGE(bucket); if ((wsize = pwrite(fd, p, size, (off_t)page << hashp->BSHIFT)) == -1) /* Errno is set */ return (-1); if (wsize != size) { errno = EFTYPE; return (-1); } return (0); } #define BYTE_MASK ((1 << INT_BYTE_SHIFT) -1) /* * Initialize a new bitmap page. Bitmap pages are left in memory * once they are read in. */ int __ibitmap(HTAB *hashp, int pnum, int nbits, int ndx) { u_int32_t *ip; int clearbytes, clearints; if ((ip = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) return (1); hashp->nmaps++; clearints = ((nbits - 1) >> INT_BYTE_SHIFT) + 1; clearbytes = clearints << INT_TO_BYTE; (void)memset((char *)ip, 0, clearbytes); (void)memset(((char *)ip) + clearbytes, 0xFF, hashp->BSIZE - clearbytes); ip[clearints - 1] = ALL_SET << (nbits & BYTE_MASK); SETBIT(ip, 0); hashp->BITMAPS[ndx] = (u_int16_t)pnum; hashp->mapp[ndx] = ip; return (0); } static u_int32_t first_free(u_int32_t map) { u_int32_t i, mask; mask = 0x1; for (i = 0; i < BITS_PER_MAP; i++) { if (!(mask & map)) return (i); mask = mask << 1; } return (i); } static u_int16_t overflow_page(HTAB *hashp) { u_int32_t *freep; int max_free, offset, splitnum; u_int16_t addr; int bit, first_page, free_bit, free_page, i, in_use_bits, j; #ifdef DEBUG2 int tmp1, tmp2; #endif splitnum = hashp->OVFL_POINT; max_free = hashp->SPARES[splitnum]; free_page = (max_free - 1) >> (hashp->BSHIFT + BYTE_SHIFT); free_bit = (max_free - 1) & ((hashp->BSIZE << BYTE_SHIFT) - 1); /* Look through all the free maps to find the first free block */ first_page = hashp->LAST_FREED >>(hashp->BSHIFT + BYTE_SHIFT); for ( i = first_page; i <= free_page; i++ ) { if (!(freep = (u_int32_t *)hashp->mapp[i]) && !(freep = fetch_bitmap(hashp, i))) return (0); if (i == free_page) in_use_bits = free_bit; else in_use_bits = (hashp->BSIZE << BYTE_SHIFT) - 1; if (i == first_page) { bit = hashp->LAST_FREED & ((hashp->BSIZE << BYTE_SHIFT) - 1); j = bit / BITS_PER_MAP; - bit = bit & ~(BITS_PER_MAP - 1); + bit = rounddown2(bit, BITS_PER_MAP); } else { bit = 0; j = 0; } for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP) if (freep[j] != ALL_SET) goto found; } /* No Free Page Found */ hashp->LAST_FREED = hashp->SPARES[splitnum]; hashp->SPARES[splitnum]++; offset = hashp->SPARES[splitnum] - (splitnum ? hashp->SPARES[splitnum - 1] : 0); #define OVMSG "HASH: Out of overflow pages. Increase page size\n" if (offset > SPLITMASK) { if (++splitnum >= NCACHED) { (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); errno = EFBIG; return (0); } hashp->OVFL_POINT = splitnum; hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; hashp->SPARES[splitnum-1]--; offset = 1; } /* Check if we need to allocate a new bitmap page */ if (free_bit == (hashp->BSIZE << BYTE_SHIFT) - 1) { free_page++; if (free_page >= NCACHED) { (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); errno = EFBIG; return (0); } /* * This is tricky. The 1 indicates that you want the new page * allocated with 1 clear bit. Actually, you are going to * allocate 2 pages from this map. The first is going to be * the map page, the second is the overflow page we were * looking for. The init_bitmap routine automatically, sets * the first bit of itself to indicate that the bitmap itself * is in use. We would explicitly set the second bit, but * don't have to if we tell init_bitmap not to leave it clear * in the first place. */ if (__ibitmap(hashp, (int)OADDR_OF(splitnum, offset), 1, free_page)) return (0); hashp->SPARES[splitnum]++; #ifdef DEBUG2 free_bit = 2; #endif offset++; if (offset > SPLITMASK) { if (++splitnum >= NCACHED) { (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); errno = EFBIG; return (0); } hashp->OVFL_POINT = splitnum; hashp->SPARES[splitnum] = hashp->SPARES[splitnum-1]; hashp->SPARES[splitnum-1]--; offset = 0; } } else { /* * Free_bit addresses the last used bit. Bump it to address * the first available bit. */ free_bit++; SETBIT(freep, free_bit); } /* Calculate address of the new overflow page */ addr = OADDR_OF(splitnum, offset); #ifdef DEBUG2 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", addr, free_bit, free_page); #endif return (addr); found: bit = bit + first_free(freep[j]); SETBIT(freep, bit); #ifdef DEBUG2 tmp1 = bit; tmp2 = i; #endif /* * Bits are addressed starting with 0, but overflow pages are addressed * beginning at 1. Bit is a bit addressnumber, so we need to increment * it to convert it to a page number. */ bit = 1 + bit + (i * (hashp->BSIZE << BYTE_SHIFT)); if (bit >= hashp->LAST_FREED) hashp->LAST_FREED = bit - 1; /* Calculate the split number for this page */ for (i = 0; (i < splitnum) && (bit > hashp->SPARES[i]); i++); offset = (i ? bit - hashp->SPARES[i - 1] : bit); if (offset >= SPLITMASK) { (void)_write(STDERR_FILENO, OVMSG, sizeof(OVMSG) - 1); errno = EFBIG; return (0); /* Out of overflow pages */ } addr = OADDR_OF(i, offset); #ifdef DEBUG2 (void)fprintf(stderr, "OVERFLOW_PAGE: ADDR: %d BIT: %d PAGE %d\n", addr, tmp1, tmp2); #endif /* Allocate and return the overflow page */ return (addr); } /* * Mark this overflow page as free. */ void __free_ovflpage(HTAB *hashp, BUFHEAD *obufp) { u_int16_t addr; u_int32_t *freep; int bit_address, free_page, free_bit; u_int16_t ndx; addr = obufp->addr; #ifdef DEBUG1 (void)fprintf(stderr, "Freeing %d\n", addr); #endif ndx = (((u_int16_t)addr) >> SPLITSHIFT); bit_address = (ndx ? hashp->SPARES[ndx - 1] : 0) + (addr & SPLITMASK) - 1; if (bit_address < hashp->LAST_FREED) hashp->LAST_FREED = bit_address; free_page = (bit_address >> (hashp->BSHIFT + BYTE_SHIFT)); free_bit = bit_address & ((hashp->BSIZE << BYTE_SHIFT) - 1); if (!(freep = hashp->mapp[free_page])) freep = fetch_bitmap(hashp, free_page); #ifdef DEBUG /* * This had better never happen. It means we tried to read a bitmap * that has already had overflow pages allocated off it, and we * failed to read it from the file. */ if (!freep) assert(0); #endif CLRBIT(freep, free_bit); #ifdef DEBUG2 (void)fprintf(stderr, "FREE_OVFLPAGE: ADDR: %d BIT: %d PAGE %d\n", obufp->addr, free_bit, free_page); #endif __reclaim_buf(hashp, obufp); } /* * Returns: * 0 success * -1 failure */ static int open_temp(HTAB *hashp) { sigset_t set, oset; int len; char *envtmp = NULL; char path[MAXPATHLEN]; if (issetugid() == 0) envtmp = getenv("TMPDIR"); len = snprintf(path, sizeof(path), "%s/_hash.XXXXXX", envtmp ? envtmp : "/tmp"); if (len < 0 || len >= (int)sizeof(path)) { errno = ENAMETOOLONG; return (-1); } /* Block signals; make sure file goes away at process exit. */ (void)sigfillset(&set); (void)__libc_sigprocmask(SIG_BLOCK, &set, &oset); if ((hashp->fp = mkostemp(path, O_CLOEXEC)) != -1) (void)unlink(path); (void)__libc_sigprocmask(SIG_SETMASK, &oset, (sigset_t *)NULL); return (hashp->fp != -1 ? 0 : -1); } /* * We have to know that the key will fit, but the last entry on the page is * an overflow pair, so we need to shift things. */ static void squeeze_key(u_int16_t *sp, const DBT *key, const DBT *val) { char *p; u_int16_t free_space, n, off, pageno; p = (char *)sp; n = sp[0]; free_space = FREESPACE(sp); off = OFFSET(sp); pageno = sp[n - 1]; off -= key->size; sp[n - 1] = off; memmove(p + off, key->data, key->size); off -= val->size; sp[n] = off; memmove(p + off, val->data, val->size); sp[0] = n + 2; sp[n + 1] = pageno; sp[n + 2] = OVFLPAGE; FREESPACE(sp) = free_space - PAIRSIZE(key, val); OFFSET(sp) = off; } static u_int32_t * fetch_bitmap(HTAB *hashp, int ndx) { if (ndx >= hashp->nmaps) return (NULL); if ((hashp->mapp[ndx] = (u_int32_t *)malloc(hashp->BSIZE)) == NULL) return (NULL); if (__get_page(hashp, (char *)hashp->mapp[ndx], hashp->BITMAPS[ndx], 0, 1, 1)) { free(hashp->mapp[ndx]); return (NULL); } return (hashp->mapp[ndx]); } #ifdef DEBUG4 int print_chain(int addr) { BUFHEAD *bufp; short *bp, oaddr; (void)fprintf(stderr, "%d ", addr); bufp = __get_buf(hashp, addr, NULL, 0); bp = (short *)bufp->page; while (bp[0] && ((bp[bp[0]] == OVFLPAGE) || ((bp[0] > 2) && bp[2] < REAL_KEY))) { oaddr = bp[bp[0] - 1]; (void)fprintf(stderr, "%d ", (int)oaddr); bufp = __get_buf(hashp, (int)oaddr, bufp, 0); bp = (short *)bufp->page; } (void)fprintf(stderr, "\n"); } #endif Index: head/lib/libc/net/ip6opt.c =================================================================== --- head/lib/libc/net/ip6opt.c (revision 298322) +++ head/lib/libc/net/ip6opt.c (revision 298323) @@ -1,601 +1,600 @@ /* $KAME: ip6opt.c,v 1.13 2003/06/06 10:08:20 suz Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include static int ip6optlen(u_int8_t *opt, u_int8_t *lim); static void inet6_insert_padopt(u_char *p, int len); #ifndef IPV6_2292HOPOPTS #define IPV6_2292HOPOPTS 22 #endif #ifndef IPV6_2292DSTOPTS #define IPV6_2292DSTOPTS 23 #endif #define is_ipv6_hopopts(x) \ ((x) == IPV6_HOPOPTS || (x) == IPV6_2292HOPOPTS) #define is_ipv6_dstopts(x) \ ((x) == IPV6_DSTOPTS || (x) == IPV6_2292DSTOPTS) /* * This function returns the number of bytes required to hold an option * when it is stored as ancillary data, including the cmsghdr structure * at the beginning, and any padding at the end (to make its size a * multiple of 8 bytes). The argument is the size of the structure * defining the option, which must include any pad bytes at the * beginning (the value y in the alignment term "xn + y"), the type * byte, the length byte, and the option data. */ int inet6_option_space(int nbytes) { nbytes += 2; /* we need space for nxt-hdr and length fields */ return(CMSG_SPACE((nbytes + 7) & ~7)); } /* * This function is called once per ancillary data object that will * contain either Hop-by-Hop or Destination options. It returns 0 on * success or -1 on an error. */ int inet6_option_init(void *bp, struct cmsghdr **cmsgp, int type) { struct cmsghdr *ch = (struct cmsghdr *)bp; /* argument validation */ if (!is_ipv6_hopopts(type) && !is_ipv6_dstopts(type)) return(-1); ch->cmsg_level = IPPROTO_IPV6; ch->cmsg_type = type; ch->cmsg_len = CMSG_LEN(0); *cmsgp = ch; return(0); } /* * This function appends a Hop-by-Hop option or a Destination option * into an ancillary data object that has been initialized by * inet6_option_init(). This function returns 0 if it succeeds or -1 on * an error. * multx is the value x in the alignment term "xn + y" described * earlier. It must have a value of 1, 2, 4, or 8. * plusy is the value y in the alignment term "xn + y" described * earlier. It must have a value between 0 and 7, inclusive. */ int inet6_option_append(struct cmsghdr *cmsg, const u_int8_t *typep, int multx, int plusy) { int padlen, optlen, off; u_char *bp = (u_char *)cmsg + cmsg->cmsg_len; struct ip6_ext *eh = (struct ip6_ext *)CMSG_DATA(cmsg); /* argument validation */ if (multx != 1 && multx != 2 && multx != 4 && multx != 8) return(-1); if (plusy < 0 || plusy > 7) return(-1); /* * If this is the first option, allocate space for the * first 2 bytes(for next header and length fields) of * the option header. */ if (bp == (u_char *)eh) { bp += 2; cmsg->cmsg_len += 2; } /* calculate pad length before the option. */ off = bp - (u_char *)eh; - padlen = (((off % multx) + (multx - 1)) & ~(multx - 1)) - - (off % multx); + padlen = roundup2(off % multx, multx) - (off % multx); padlen += plusy; padlen %= multx; /* keep the pad as short as possible */ /* insert padding */ inet6_insert_padopt(bp, padlen); cmsg->cmsg_len += padlen; bp += padlen; /* copy the option */ if (typep[0] == IP6OPT_PAD1) optlen = 1; else optlen = typep[1] + 2; memcpy(bp, typep, optlen); bp += optlen; cmsg->cmsg_len += optlen; /* calculate pad length after the option and insert the padding */ off = bp - (u_char *)eh; padlen = ((off + 7) & ~7) - off; inet6_insert_padopt(bp, padlen); bp += padlen; cmsg->cmsg_len += padlen; /* update the length field of the ip6 option header */ eh->ip6e_len = ((bp - (u_char *)eh) >> 3) - 1; return(0); } /* * This function appends a Hop-by-Hop option or a Destination option * into an ancillary data object that has been initialized by * inet6_option_init(). This function returns a pointer to the 8-bit * option type field that starts the option on success, or NULL on an * error. * The difference between this function and inet6_option_append() is * that the latter copies the contents of a previously built option into * the ancillary data object while the current function returns a * pointer to the space in the data object where the option's TLV must * then be built by the caller. * */ u_int8_t * inet6_option_alloc(struct cmsghdr *cmsg, int datalen, int multx, int plusy) { int padlen, off; u_int8_t *bp = (u_char *)cmsg + cmsg->cmsg_len; u_int8_t *retval; struct ip6_ext *eh = (struct ip6_ext *)CMSG_DATA(cmsg); /* argument validation */ if (multx != 1 && multx != 2 && multx != 4 && multx != 8) return(NULL); if (plusy < 0 || plusy > 7) return(NULL); /* * If this is the first option, allocate space for the * first 2 bytes(for next header and length fields) of * the option header. */ if (bp == (u_char *)eh) { bp += 2; cmsg->cmsg_len += 2; } /* calculate pad length before the option. */ off = bp - (u_char *)eh; - padlen = (((off % multx) + (multx - 1)) & ~(multx - 1)) - + padlen = roundup2(off % multx, multx) - (off % multx); padlen += plusy; padlen %= multx; /* keep the pad as short as possible */ /* insert padding */ inet6_insert_padopt(bp, padlen); cmsg->cmsg_len += padlen; bp += padlen; /* keep space to store specified length of data */ retval = bp; bp += datalen; cmsg->cmsg_len += datalen; /* calculate pad length after the option and insert the padding */ off = bp - (u_char *)eh; padlen = ((off + 7) & ~7) - off; inet6_insert_padopt(bp, padlen); bp += padlen; cmsg->cmsg_len += padlen; /* update the length field of the ip6 option header */ eh->ip6e_len = ((bp - (u_char *)eh) >> 3) - 1; return(retval); } /* * This function processes the next Hop-by-Hop option or Destination * option in an ancillary data object. If another option remains to be * processed, the return value of the function is 0 and *tptrp points to * the 8-bit option type field (which is followed by the 8-bit option * data length, followed by the option data). If no more options remain * to be processed, the return value is -1 and *tptrp is NULL. If an * error occurs, the return value is -1 and *tptrp is not NULL. * (RFC 2292, 6.3.5) */ int inet6_option_next(const struct cmsghdr *cmsg, u_int8_t **tptrp) { struct ip6_ext *ip6e; int hdrlen, optlen; u_int8_t *lim; if (cmsg->cmsg_level != IPPROTO_IPV6 || (!is_ipv6_hopopts(cmsg->cmsg_type) && !is_ipv6_dstopts(cmsg->cmsg_type))) return(-1); /* message length validation */ if (cmsg->cmsg_len < CMSG_SPACE(sizeof(struct ip6_ext))) return(-1); ip6e = (struct ip6_ext *)CMSG_DATA(cmsg); hdrlen = (ip6e->ip6e_len + 1) << 3; if (cmsg->cmsg_len < CMSG_SPACE(hdrlen)) return(-1); /* * If the caller does not specify the starting point, * simply return the 1st option. * Otherwise, search the option list for the next option. */ lim = (u_int8_t *)ip6e + hdrlen; if (*tptrp == NULL) *tptrp = (u_int8_t *)(ip6e + 1); else { if ((optlen = ip6optlen(*tptrp, lim)) == 0) return(-1); *tptrp = *tptrp + optlen; } if (*tptrp >= lim) { /* there is no option */ *tptrp = NULL; return(-1); } /* * Finally, checks if the next option is safely stored in the * cmsg data. */ if (ip6optlen(*tptrp, lim) == 0) return(-1); else return(0); } /* * This function is similar to the inet6_option_next() function, * except this function lets the caller specify the option type to be * searched for, instead of always returning the next option in the * ancillary data object. * Note: RFC 2292 says the type of tptrp is u_int8_t *, but we think * it's a typo. The variable should be type of u_int8_t **. */ int inet6_option_find(const struct cmsghdr *cmsg, u_int8_t **tptrp, int type) { struct ip6_ext *ip6e; int hdrlen, optlen; u_int8_t *optp, *lim; if (cmsg->cmsg_level != IPPROTO_IPV6 || (!is_ipv6_hopopts(cmsg->cmsg_type) && !is_ipv6_dstopts(cmsg->cmsg_type))) return(-1); /* message length validation */ if (cmsg->cmsg_len < CMSG_SPACE(sizeof(struct ip6_ext))) return(-1); ip6e = (struct ip6_ext *)CMSG_DATA(cmsg); hdrlen = (ip6e->ip6e_len + 1) << 3; if (cmsg->cmsg_len < CMSG_SPACE(hdrlen)) return(-1); /* * If the caller does not specify the starting point, * search from the beginning of the option list. * Otherwise, search from *the next option* of the specified point. */ lim = (u_int8_t *)ip6e + hdrlen; if (*tptrp == NULL) *tptrp = (u_int8_t *)(ip6e + 1); else { if ((optlen = ip6optlen(*tptrp, lim)) == 0) return(-1); *tptrp = *tptrp + optlen; } for (optp = *tptrp; optp < lim; optp += optlen) { if (*optp == type) { *tptrp = optp; return(0); } if ((optlen = ip6optlen(optp, lim)) == 0) return(-1); } /* search failed */ *tptrp = NULL; return(-1); } /* * Calculate the length of a given IPv6 option. Also checks * if the option is safely stored in user's buffer according to the * calculated length and the limitation of the buffer. */ static int ip6optlen(u_int8_t *opt, u_int8_t *lim) { int optlen; if (*opt == IP6OPT_PAD1) optlen = 1; else { /* is there enough space to store type and len? */ if (opt + 2 > lim) return(0); optlen = *(opt + 1) + 2; } if (opt + optlen <= lim) return(optlen); return(0); } static void inet6_insert_padopt(u_char *p, int len) { switch(len) { case 0: return; case 1: p[0] = IP6OPT_PAD1; return; default: p[0] = IP6OPT_PADN; p[1] = len - 2; memset(&p[2], 0, len - 2); return; } } /* * The following functions are defined in RFC3542, which is a successor * of RFC2292. */ int inet6_opt_init(void *extbuf, socklen_t extlen) { struct ip6_ext *ext = (struct ip6_ext *)extbuf; if (ext) { if (extlen <= 0 || (extlen % 8)) return(-1); ext->ip6e_len = (extlen >> 3) - 1; } return(2); /* sizeof the next and the length fields */ } int inet6_opt_append(void *extbuf, socklen_t extlen, int offset, u_int8_t type, socklen_t len, u_int8_t align, void **databufp) { int currentlen = offset, padlen = 0; /* * The option type must have a value from 2 to 255, inclusive. * (0 and 1 are reserved for the Pad1 and PadN options, respectively.) */ if (type < 2) return(-1); /* * The option data length must have a value between 0 and 255, * inclusive, and is the length of the option data that follows. */ if (len > 255 || len < 0 ) return(-1); /* * The align parameter must have a value of 1, 2, 4, or 8. * The align value can not exceed the value of len. */ if (align != 1 && align != 2 && align != 4 && align != 8) return(-1); if (align > len) return(-1); /* Calculate the padding length. */ currentlen += 2 + len; /* 2 means "type + len" */ if (currentlen % align) padlen = align - (currentlen % align); /* The option must fit in the extension header buffer. */ currentlen += padlen; if (extlen && /* XXX: right? */ currentlen > extlen) return(-1); if (extbuf) { u_int8_t *optp = (u_int8_t *)extbuf + offset; if (padlen == 1) { /* insert a Pad1 option */ *optp = IP6OPT_PAD1; optp++; } else if (padlen > 0) { /* insert a PadN option for alignment */ *optp++ = IP6OPT_PADN; *optp++ = padlen - 2; memset(optp, 0, padlen - 2); optp += (padlen - 2); } *optp++ = type; *optp++ = len; *databufp = optp; } return(currentlen); } int inet6_opt_finish(void *extbuf, socklen_t extlen, int offset) { int updatelen = offset > 0 ? (1 + ((offset - 1) | 7)) : 0; if (extbuf) { u_int8_t *padp; int padlen = updatelen - offset; if (updatelen > extlen) return(-1); padp = (u_int8_t *)extbuf + offset; if (padlen == 1) *padp = IP6OPT_PAD1; else if (padlen > 0) { *padp++ = IP6OPT_PADN; *padp++ = (padlen - 2); memset(padp, 0, padlen - 2); } } return(updatelen); } int inet6_opt_set_val(void *databuf, int offset, void *val, socklen_t vallen) { memcpy((u_int8_t *)databuf + offset, val, vallen); return(offset + vallen); } int inet6_opt_next(void *extbuf, socklen_t extlen, int offset, u_int8_t *typep, socklen_t *lenp, void **databufp) { u_int8_t *optp, *lim; int optlen; /* Validate extlen. XXX: is the variable really necessary?? */ if (extlen == 0 || (extlen % 8)) return(-1); lim = (u_int8_t *)extbuf + extlen; /* * If this is the first time this function called for this options * header, simply return the 1st option. * Otherwise, search the option list for the next option. */ if (offset == 0) { optp = (u_int8_t *)((struct ip6_hbh *)extbuf + 1); } else optp = (u_int8_t *)extbuf + offset; /* Find the next option skipping any padding options. */ while(optp < lim) { switch(*optp) { case IP6OPT_PAD1: optp++; break; case IP6OPT_PADN: if ((optlen = ip6optlen(optp, lim)) == 0) goto optend; optp += optlen; break; default: /* found */ if ((optlen = ip6optlen(optp, lim)) == 0) goto optend; *typep = *optp; *lenp = optlen - 2; *databufp = optp + 2; return(optp + optlen - (u_int8_t *)extbuf); } } optend: *databufp = NULL; /* for safety */ return(-1); } int inet6_opt_find(void *extbuf, socklen_t extlen, int offset, u_int8_t type, socklen_t *lenp, void **databufp) { u_int8_t *optp, *lim; int optlen; /* Validate extlen. XXX: is the variable really necessary?? */ if (extlen == 0 || (extlen % 8)) return(-1); lim = (u_int8_t *)extbuf + extlen; /* * If this is the first time this function called for this options * header, simply return the 1st option. * Otherwise, search the option list for the next option. */ if (offset == 0) { optp = (u_int8_t *)((struct ip6_hbh *)extbuf + 1); } else optp = (u_int8_t *)extbuf + offset; /* Find the specified option */ while(optp < lim) { if ((optlen = ip6optlen(optp, lim)) == 0) goto optend; if (*optp == type) { /* found */ *lenp = optlen - 2; *databufp = optp + 2; return(optp + optlen - (u_int8_t *)extbuf); } optp += optlen; } optend: *databufp = NULL; /* for safety */ return(-1); } int inet6_opt_get_val(void *databuf, int offset, void *val, socklen_t vallen) { /* we can't assume alignment here */ memcpy(val, (u_int8_t *)databuf + offset, vallen); return(offset + vallen); } Index: head/lib/libc/net/name6.c =================================================================== --- head/lib/libc/net/name6.c (revision 298322) +++ head/lib/libc/net/name6.c (revision 298323) @@ -1,1112 +1,1113 @@ /* $KAME: name6.c,v 1.25 2000/06/26 16:44:40 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, 1998, and 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * ++Copyright++ 1985, 1988, 1993 * - * Copyright (c) 1985, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * Portions Copyright (c) 1993 by Digital Equipment Corporation. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies, and that * the name of Digital Equipment Corporation not be used in advertising or * publicity pertaining to distribution of the document or software without * specific, written prior permission. * * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * - * --Copyright-- */ /* * Atsushi Onoe */ #include __FBSDID("$FreeBSD$"); #include "namespace.h" #include #include #include #include #include #ifdef INET6 #include #include #include #include /* XXX */ #endif #include #include #include #include #include #include #include #include #include #include #include #include "un-namespace.h" #include "netdb_private.h" #include "res_private.h" #ifndef MAXALIASES #define MAXALIASES 10 #endif #ifndef MAXADDRS #define MAXADDRS 20 #endif #ifndef MAXDNAME #define MAXDNAME 1025 #endif #ifdef INET6 #define ADDRLEN(af) ((af) == AF_INET6 ? sizeof(struct in6_addr) : \ sizeof(struct in_addr)) #else #define ADDRLEN(af) sizeof(struct in_addr) #endif #define MAPADDR(ab, ina) \ do { \ memcpy(&(ab)->map_inaddr, ina, sizeof(struct in_addr)); \ memset((ab)->map_zero, 0, sizeof((ab)->map_zero)); \ memset((ab)->map_one, 0xff, sizeof((ab)->map_one)); \ } while (0) #define MAPADDRENABLED(flags) \ (((flags) & AI_V4MAPPED) || \ (((flags) & AI_V4MAPPED_CFG))) union inx_addr { struct in_addr in_addr; #ifdef INET6 struct in6_addr in6_addr; #endif struct { u_char mau_zero[10]; u_char mau_one[2]; struct in_addr mau_inaddr; } map_addr_un; #define map_zero map_addr_un.mau_zero #define map_one map_addr_un.mau_one #define map_inaddr map_addr_un.mau_inaddr }; struct policyqueue { TAILQ_ENTRY(policyqueue) pc_entry; #ifdef INET6 struct in6_addrpolicy pc_policy; #endif }; TAILQ_HEAD(policyhead, policyqueue); #define AIO_SRCFLAG_DEPRECATED 0x1 struct hp_order { union { struct sockaddr_storage aiou_ss; struct sockaddr aiou_sa; } aio_src_un; #define aio_srcsa aio_src_un.aiou_sa u_int32_t aio_srcflag; int aio_srcscope; int aio_dstscope; struct policyqueue *aio_srcpolicy; struct policyqueue *aio_dstpolicy; union { struct sockaddr_storage aiou_ss; struct sockaddr aiou_sa; } aio_un; #define aio_sa aio_un.aiou_sa int aio_matchlen; char *aio_h_addr; }; static struct hostent *_hpcopy(struct hostent *, int *); static struct hostent *_hpaddr(int, const char *, void *, int *); #ifdef INET6 static struct hostent *_hpmerge(struct hostent *, struct hostent *, int *); static struct hostent *_hpmapv6(struct hostent *, int *); #endif static struct hostent *_hpsort(struct hostent *, res_state); #ifdef INET6 static struct hostent *_hpreorder(struct hostent *); static int get_addrselectpolicy(struct policyhead *); static void free_addrselectpolicy(struct policyhead *); static struct policyqueue *match_addrselectpolicy(struct sockaddr *, struct policyhead *); static void set_source(struct hp_order *, struct policyhead *); static int matchlen(struct sockaddr *, struct sockaddr *); static int comp_dst(const void *, const void *); static int gai_addr2scopetype(struct sockaddr *); #endif /* * Functions defined in RFC2553 * getipnodebyname, getipnodebyaddr, freehostent */ struct hostent * getipnodebyname(const char *name, int af, int flags, int *errp) { struct hostent *hp; union inx_addr addrbuf; res_state statp; u_long options; switch (af) { case AF_INET: #ifdef INET6 case AF_INET6: #endif break; default: *errp = NO_RECOVERY; return NULL; } if (flags & AI_ADDRCONFIG) { int s; if ((s = _socket(af, SOCK_DGRAM | SOCK_CLOEXEC, 0)) < 0) return NULL; /* * TODO: * Note that implementation dependent test for address * configuration should be done everytime called * (or apropriate interval), * because addresses will be dynamically assigned or deleted. */ _close(s); } #ifdef INET6 /* special case for literal address */ if (inet_pton(AF_INET6, name, &addrbuf) == 1) { if (af != AF_INET6) { *errp = HOST_NOT_FOUND; return NULL; } return _hpaddr(af, name, &addrbuf, errp); } #endif if (inet_aton(name, (struct in_addr *)&addrbuf) == 1) { if (af != AF_INET) { if (MAPADDRENABLED(flags)) { MAPADDR(&addrbuf, &addrbuf.in_addr); } else { *errp = HOST_NOT_FOUND; return NULL; } } return _hpaddr(af, name, &addrbuf, errp); } statp = __res_state(); if ((statp->options & RES_INIT) == 0) { if (res_ninit(statp) < 0) { *errp = NETDB_INTERNAL; return NULL; } } options = statp->options; statp->options &= ~RES_USE_INET6; hp = gethostbyname2(name, af); hp = _hpcopy(hp, errp); #ifdef INET6 if (af == AF_INET6) hp = _hpreorder(hp); if (af == AF_INET6 && ((flags & AI_ALL) || hp == NULL) && MAPADDRENABLED(flags)) { struct hostent *hp2 = gethostbyname2(name, AF_INET); if (hp == NULL) if (hp2 == NULL) *errp = statp->res_h_errno; else hp = _hpmapv6(hp2, errp); else { if (hp2 && strcmp(hp->h_name, hp2->h_name) == 0) { struct hostent *hpb = hp; hp = _hpmerge(hpb, hp2, errp); freehostent(hpb); } } } #endif if (hp == NULL) *errp = statp->res_h_errno; statp->options = options; return _hpsort(hp, statp); } struct hostent * getipnodebyaddr(const void *src, size_t len, int af, int *errp) { struct hostent *hp; res_state statp; u_long options; #ifdef INET6 struct in6_addr addrbuf; #else struct in_addr addrbuf; #endif switch (af) { case AF_INET: if (len != sizeof(struct in_addr)) { *errp = NO_RECOVERY; return NULL; } - if ((long)src & ~(sizeof(struct in_addr) - 1)) { + if (rounddown2((long)src, sizeof(struct in_addr))) { memcpy(&addrbuf, src, len); src = &addrbuf; } if (((struct in_addr *)src)->s_addr == 0) return NULL; break; #ifdef INET6 case AF_INET6: if (len != sizeof(struct in6_addr)) { *errp = NO_RECOVERY; return NULL; } - if ((long)src & ~(sizeof(struct in6_addr) / 2 - 1)) { /*XXX*/ + if (rounddown2((long)src, sizeof(struct in6_addr) / 2)) { + /* XXX */ memcpy(&addrbuf, src, len); src = &addrbuf; } if (IN6_IS_ADDR_UNSPECIFIED((struct in6_addr *)src)) return NULL; if (IN6_IS_ADDR_V4MAPPED((struct in6_addr *)src) || IN6_IS_ADDR_V4COMPAT((struct in6_addr *)src)) { src = (char *)src + (sizeof(struct in6_addr) - sizeof(struct in_addr)); af = AF_INET; len = sizeof(struct in_addr); } break; #endif default: *errp = NO_RECOVERY; return NULL; } statp = __res_state(); if ((statp->options & RES_INIT) == 0) { if (res_ninit(statp) < 0) { RES_SET_H_ERRNO(statp, NETDB_INTERNAL); return NULL; } } options = statp->options; statp->options &= ~RES_USE_INET6; hp = gethostbyaddr(src, len, af); if (hp == NULL) *errp = statp->res_h_errno; statp->options = options; return (_hpcopy(hp, errp)); } void freehostent(struct hostent *ptr) { free(ptr); } /* * Private utility functions */ /* * _hpcopy: allocate and copy hostent structure */ static struct hostent * _hpcopy(struct hostent *hp, int *errp) { struct hostent *nhp; char *cp, **pp; int size, addrsize; int nalias = 0, naddr = 0; int al_off; int i; if (hp == NULL) return hp; /* count size to be allocated */ size = sizeof(struct hostent); if (hp->h_name != NULL) size += strlen(hp->h_name) + 1; if ((pp = hp->h_aliases) != NULL) { for (i = 0; *pp != NULL; i++, pp++) { if (**pp != '\0') { size += strlen(*pp) + 1; nalias++; } } } /* adjust alignment */ size = ALIGN(size); al_off = size; size += sizeof(char *) * (nalias + 1); addrsize = ALIGN(hp->h_length); if ((pp = hp->h_addr_list) != NULL) { while (*pp++ != NULL) naddr++; } size += addrsize * naddr; size += sizeof(char *) * (naddr + 1); /* copy */ if ((nhp = (struct hostent *)malloc(size)) == NULL) { *errp = TRY_AGAIN; return NULL; } cp = (char *)&nhp[1]; if (hp->h_name != NULL) { nhp->h_name = cp; strcpy(cp, hp->h_name); cp += strlen(cp) + 1; } else nhp->h_name = NULL; nhp->h_aliases = (char **)((char *)nhp + al_off); if ((pp = hp->h_aliases) != NULL) { for (i = 0; *pp != NULL; pp++) { if (**pp != '\0') { nhp->h_aliases[i++] = cp; strcpy(cp, *pp); cp += strlen(cp) + 1; } } } nhp->h_aliases[nalias] = NULL; cp = (char *)&nhp->h_aliases[nalias + 1]; nhp->h_addrtype = hp->h_addrtype; nhp->h_length = hp->h_length; nhp->h_addr_list = (char **)cp; if ((pp = hp->h_addr_list) != NULL) { cp = (char *)&nhp->h_addr_list[naddr + 1]; for (i = 0; *pp != NULL; pp++) { nhp->h_addr_list[i++] = cp; memcpy(cp, *pp, hp->h_length); cp += addrsize; } } nhp->h_addr_list[naddr] = NULL; return nhp; } /* * _hpaddr: construct hostent structure with one address */ static struct hostent * _hpaddr(int af, const char *name, void *addr, int *errp) { struct hostent *hp, hpbuf; char *addrs[2]; hp = &hpbuf; hp->h_name = (char *)name; hp->h_aliases = NULL; hp->h_addrtype = af; hp->h_length = ADDRLEN(af); hp->h_addr_list = addrs; addrs[0] = (char *)addr; addrs[1] = NULL; return (_hpcopy(hp, errp)); } #ifdef INET6 /* * _hpmerge: merge 2 hostent structure, arguments will be freed */ static struct hostent * _hpmerge(struct hostent *hp1, struct hostent *hp2, int *errp) { int i, j; int naddr, nalias; char **pp; struct hostent *hp, hpbuf; char *aliases[MAXALIASES + 1], *addrs[MAXADDRS + 1]; union inx_addr addrbuf[MAXADDRS]; if (hp1 == NULL) return _hpcopy(hp2, errp); if (hp2 == NULL) return _hpcopy(hp1, errp); #define HP(i) (i == 1 ? hp1 : hp2) hp = &hpbuf; hp->h_name = (hp1->h_name != NULL ? hp1->h_name : hp2->h_name); hp->h_aliases = aliases; nalias = 0; for (i = 1; i <= 2; i++) { if ((pp = HP(i)->h_aliases) == NULL) continue; for (; nalias < MAXALIASES && *pp != NULL; pp++) { /* check duplicates */ for (j = 0; j < nalias; j++) if (strcasecmp(*pp, aliases[j]) == 0) break; if (j == nalias) aliases[nalias++] = *pp; } } aliases[nalias] = NULL; if (hp1->h_length != hp2->h_length) { hp->h_addrtype = AF_INET6; hp->h_length = sizeof(struct in6_addr); } else { hp->h_addrtype = hp1->h_addrtype; hp->h_length = hp1->h_length; } hp->h_addr_list = addrs; naddr = 0; for (i = 1; i <= 2; i++) { if ((pp = HP(i)->h_addr_list) == NULL) continue; if (HP(i)->h_length == hp->h_length) { while (naddr < MAXADDRS && *pp != NULL) addrs[naddr++] = *pp++; } else { /* copy IPv4 addr as mapped IPv6 addr */ while (naddr < MAXADDRS && *pp != NULL) { MAPADDR(&addrbuf[naddr], *pp++); addrs[naddr] = (char *)&addrbuf[naddr]; naddr++; } } } addrs[naddr] = NULL; return (_hpcopy(hp, errp)); } #endif /* * _hpmapv6: convert IPv4 hostent into IPv4-mapped IPv6 addresses */ #ifdef INET6 static struct hostent * _hpmapv6(struct hostent *hp, int *errp) { struct hostent hp6; if (hp == NULL) return NULL; if (hp->h_addrtype == AF_INET6) return _hpcopy(hp, errp); memset(&hp6, 0, sizeof(struct hostent)); hp6.h_addrtype = AF_INET6; hp6.h_length = sizeof(struct in6_addr); return _hpmerge(&hp6, hp, errp); } #endif /* * _hpsort: sort address by sortlist */ static struct hostent * _hpsort(struct hostent *hp, res_state statp) { int i, j, n; u_char *ap, *sp, *mp, **pp; char t; char order[MAXADDRS]; int nsort = statp->nsort; if (hp == NULL || hp->h_addr_list[1] == NULL || nsort == 0) return hp; for (i = 0; (ap = (u_char *)hp->h_addr_list[i]); i++) { for (j = 0; j < nsort; j++) { #ifdef INET6 if (statp->_u._ext.ext->sort_list[j].af != hp->h_addrtype) continue; sp = (u_char *)&statp->_u._ext.ext->sort_list[j].addr; mp = (u_char *)&statp->_u._ext.ext->sort_list[j].mask; #else sp = (u_char *)&statp->sort_list[j].addr; mp = (u_char *)&statp->sort_list[j].mask; #endif for (n = 0; n < hp->h_length; n++) { if ((ap[n] & mp[n]) != sp[n]) break; } if (n == hp->h_length) break; } order[i] = j; } n = i; pp = (u_char **)hp->h_addr_list; for (i = 0; i < n - 1; i++) { for (j = i + 1; j < n; j++) { if (order[i] > order[j]) { ap = pp[i]; pp[i] = pp[j]; pp[j] = ap; t = order[i]; order[i] = order[j]; order[j] = t; } } } return hp; } #ifdef INET6 /* * _hpreorder: sort address by default address selection */ static struct hostent * _hpreorder(struct hostent *hp) { struct hp_order *aio; int i, n; char *ap; struct sockaddr *sa; struct policyhead policyhead; if (hp == NULL) return hp; switch (hp->h_addrtype) { case AF_INET: #ifdef INET6 case AF_INET6: #endif break; default: free_addrselectpolicy(&policyhead); return hp; } /* count the number of addrinfo elements for sorting. */ for (n = 0; hp->h_addr_list[n] != NULL; n++) ; /* * If the number is small enough, we can skip the reordering process. */ if (n <= 1) return hp; /* allocate a temporary array for sort and initialization of it. */ if ((aio = malloc(sizeof(*aio) * n)) == NULL) return hp; /* give up reordering */ memset(aio, 0, sizeof(*aio) * n); /* retrieve address selection policy from the kernel */ TAILQ_INIT(&policyhead); if (!get_addrselectpolicy(&policyhead)) { /* no policy is installed into kernel, we don't sort. */ free(aio); return hp; } for (i = 0; i < n; i++) { ap = hp->h_addr_list[i]; aio[i].aio_h_addr = ap; sa = &aio[i].aio_sa; switch (hp->h_addrtype) { case AF_INET: sa->sa_family = AF_INET; sa->sa_len = sizeof(struct sockaddr_in); memcpy(&((struct sockaddr_in *)sa)->sin_addr, ap, sizeof(struct in_addr)); break; #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_V4MAPPED((struct in6_addr *)ap)) { sa->sa_family = AF_INET; sa->sa_len = sizeof(struct sockaddr_in); memcpy(&((struct sockaddr_in *)sa)->sin_addr, &ap[12], sizeof(struct in_addr)); } else { sa->sa_family = AF_INET6; sa->sa_len = sizeof(struct sockaddr_in6); memcpy(&((struct sockaddr_in6 *)sa)->sin6_addr, ap, sizeof(struct in6_addr)); } break; #endif } aio[i].aio_dstscope = gai_addr2scopetype(sa); aio[i].aio_dstpolicy = match_addrselectpolicy(sa, &policyhead); set_source(&aio[i], &policyhead); } /* perform sorting. */ qsort(aio, n, sizeof(*aio), comp_dst); /* reorder the h_addr_list. */ for (i = 0; i < n; i++) hp->h_addr_list[i] = aio[i].aio_h_addr; /* cleanup and return */ free(aio); free_addrselectpolicy(&policyhead); return hp; } static int get_addrselectpolicy(struct policyhead *head) { #ifdef INET6 int mib[] = { CTL_NET, PF_INET6, IPPROTO_IPV6, IPV6CTL_ADDRCTLPOLICY }; size_t l; char *buf; struct in6_addrpolicy *pol, *ep; if (sysctl(mib, nitems(mib), NULL, &l, NULL, 0) < 0) return (0); if ((buf = malloc(l)) == NULL) return (0); if (sysctl(mib, nitems(mib), buf, &l, NULL, 0) < 0) { free(buf); return (0); } ep = (struct in6_addrpolicy *)(buf + l); for (pol = (struct in6_addrpolicy *)buf; pol + 1 <= ep; pol++) { struct policyqueue *new; if ((new = malloc(sizeof(*new))) == NULL) { free_addrselectpolicy(head); /* make the list empty */ break; } new->pc_policy = *pol; TAILQ_INSERT_TAIL(head, new, pc_entry); } free(buf); return (1); #else return (0); #endif } static void free_addrselectpolicy(struct policyhead *head) { struct policyqueue *ent, *nent; for (ent = TAILQ_FIRST(head); ent; ent = nent) { nent = TAILQ_NEXT(ent, pc_entry); TAILQ_REMOVE(head, ent, pc_entry); free(ent); } } static struct policyqueue * match_addrselectpolicy(struct sockaddr *addr, struct policyhead *head) { #ifdef INET6 struct policyqueue *ent, *bestent = NULL; struct in6_addrpolicy *pol; int matchlen, bestmatchlen = -1; u_char *mp, *ep, *k, *p, m; struct sockaddr_in6 key; switch(addr->sa_family) { case AF_INET6: key = *(struct sockaddr_in6 *)addr; break; case AF_INET: /* convert the address into IPv4-mapped IPv6 address. */ memset(&key, 0, sizeof(key)); key.sin6_family = AF_INET6; key.sin6_len = sizeof(key); _map_v4v6_address( (char *)&((struct sockaddr_in *)addr)->sin_addr, (char *)&key.sin6_addr); break; default: return(NULL); } for (ent = TAILQ_FIRST(head); ent; ent = TAILQ_NEXT(ent, pc_entry)) { pol = &ent->pc_policy; matchlen = 0; mp = (u_char *)&pol->addrmask.sin6_addr; ep = mp + 16; /* XXX: scope field? */ k = (u_char *)&key.sin6_addr; p = (u_char *)&pol->addr.sin6_addr; for (; mp < ep && *mp; mp++, k++, p++) { m = *mp; if ((*k & m) != *p) goto next; /* not match */ if (m == 0xff) /* short cut for a typical case */ matchlen += 8; else { while (m >= 0x80) { matchlen++; m <<= 1; } } } /* matched. check if this is better than the current best. */ if (matchlen > bestmatchlen) { bestent = ent; bestmatchlen = matchlen; } next: continue; } return(bestent); #else return(NULL); #endif } static void set_source(struct hp_order *aio, struct policyhead *ph) { struct sockaddr_storage ss = aio->aio_un.aiou_ss; socklen_t srclen; int s; /* set unspec ("no source is available"), just in case */ aio->aio_srcsa.sa_family = AF_UNSPEC; aio->aio_srcscope = -1; switch(ss.ss_family) { case AF_INET: ((struct sockaddr_in *)&ss)->sin_port = htons(1); break; #ifdef INET6 case AF_INET6: ((struct sockaddr_in6 *)&ss)->sin6_port = htons(1); break; #endif default: /* ignore unsupported AFs explicitly */ return; } /* open a socket to get the source address for the given dst */ if ((s = _socket(ss.ss_family, SOCK_DGRAM | SOCK_CLOEXEC, IPPROTO_UDP)) < 0) return; /* give up */ if (_connect(s, (struct sockaddr *)&ss, ss.ss_len) < 0) goto cleanup; srclen = ss.ss_len; if (_getsockname(s, &aio->aio_srcsa, &srclen) < 0) { aio->aio_srcsa.sa_family = AF_UNSPEC; goto cleanup; } aio->aio_srcscope = gai_addr2scopetype(&aio->aio_srcsa); aio->aio_srcpolicy = match_addrselectpolicy(&aio->aio_srcsa, ph); aio->aio_matchlen = matchlen(&aio->aio_srcsa, (struct sockaddr *)&ss); #ifdef INET6 if (ss.ss_family == AF_INET6) { struct in6_ifreq ifr6; u_int32_t flags6; memset(&ifr6, 0, sizeof(ifr6)); memcpy(&ifr6.ifr_addr, &ss, ss.ss_len); if (_ioctl(s, SIOCGIFAFLAG_IN6, &ifr6) == 0) { flags6 = ifr6.ifr_ifru.ifru_flags6; if ((flags6 & IN6_IFF_DEPRECATED)) aio->aio_srcflag |= AIO_SRCFLAG_DEPRECATED; } } #endif cleanup: _close(s); return; } static int matchlen(struct sockaddr *src, struct sockaddr *dst) { int match = 0; u_char *s, *d; u_char *lim, r; int addrlen; switch (src->sa_family) { #ifdef INET6 case AF_INET6: s = (u_char *)&((struct sockaddr_in6 *)src)->sin6_addr; d = (u_char *)&((struct sockaddr_in6 *)dst)->sin6_addr; addrlen = sizeof(struct in6_addr); lim = s + addrlen; break; #endif case AF_INET: s = (u_char *)&((struct sockaddr_in *)src)->sin_addr; d = (u_char *)&((struct sockaddr_in *)dst)->sin_addr; addrlen = sizeof(struct in_addr); lim = s + addrlen; break; default: return(0); } while (s < lim) if ((r = (*d++ ^ *s++)) != 0) { while (r < addrlen * 8) { match++; r <<= 1; } break; } else match += 8; return(match); } static int comp_dst(const void *arg1, const void *arg2) { const struct hp_order *dst1 = arg1, *dst2 = arg2; /* * Rule 1: Avoid unusable destinations. * XXX: we currently do not consider if an appropriate route exists. */ if (dst1->aio_srcsa.sa_family != AF_UNSPEC && dst2->aio_srcsa.sa_family == AF_UNSPEC) { return(-1); } if (dst1->aio_srcsa.sa_family == AF_UNSPEC && dst2->aio_srcsa.sa_family != AF_UNSPEC) { return(1); } /* Rule 2: Prefer matching scope. */ if (dst1->aio_dstscope == dst1->aio_srcscope && dst2->aio_dstscope != dst2->aio_srcscope) { return(-1); } if (dst1->aio_dstscope != dst1->aio_srcscope && dst2->aio_dstscope == dst2->aio_srcscope) { return(1); } /* Rule 3: Avoid deprecated addresses. */ if (dst1->aio_srcsa.sa_family != AF_UNSPEC && dst2->aio_srcsa.sa_family != AF_UNSPEC) { if (!(dst1->aio_srcflag & AIO_SRCFLAG_DEPRECATED) && (dst2->aio_srcflag & AIO_SRCFLAG_DEPRECATED)) { return(-1); } if ((dst1->aio_srcflag & AIO_SRCFLAG_DEPRECATED) && !(dst2->aio_srcflag & AIO_SRCFLAG_DEPRECATED)) { return(1); } } /* Rule 4: Prefer home addresses. */ /* XXX: not implemented yet */ /* Rule 5: Prefer matching label. */ #ifdef INET6 if (dst1->aio_srcpolicy && dst1->aio_dstpolicy && dst1->aio_srcpolicy->pc_policy.label == dst1->aio_dstpolicy->pc_policy.label && (dst2->aio_srcpolicy == NULL || dst2->aio_dstpolicy == NULL || dst2->aio_srcpolicy->pc_policy.label != dst2->aio_dstpolicy->pc_policy.label)) { return(-1); } if (dst2->aio_srcpolicy && dst2->aio_dstpolicy && dst2->aio_srcpolicy->pc_policy.label == dst2->aio_dstpolicy->pc_policy.label && (dst1->aio_srcpolicy == NULL || dst1->aio_dstpolicy == NULL || dst1->aio_srcpolicy->pc_policy.label != dst1->aio_dstpolicy->pc_policy.label)) { return(1); } #endif /* Rule 6: Prefer higher precedence. */ #ifdef INET6 if (dst1->aio_dstpolicy && (dst2->aio_dstpolicy == NULL || dst1->aio_dstpolicy->pc_policy.preced > dst2->aio_dstpolicy->pc_policy.preced)) { return(-1); } if (dst2->aio_dstpolicy && (dst1->aio_dstpolicy == NULL || dst2->aio_dstpolicy->pc_policy.preced > dst1->aio_dstpolicy->pc_policy.preced)) { return(1); } #endif /* Rule 7: Prefer native transport. */ /* XXX: not implemented yet */ /* Rule 8: Prefer smaller scope. */ if (dst1->aio_dstscope >= 0 && dst1->aio_dstscope < dst2->aio_dstscope) { return(-1); } if (dst2->aio_dstscope >= 0 && dst2->aio_dstscope < dst1->aio_dstscope) { return(1); } /* * Rule 9: Use longest matching prefix. * We compare the match length in a same AF only. */ if (dst1->aio_sa.sa_family == dst2->aio_sa.sa_family) { if (dst1->aio_matchlen > dst2->aio_matchlen) { return(-1); } if (dst1->aio_matchlen < dst2->aio_matchlen) { return(1); } } /* Rule 10: Otherwise, leave the order unchanged. */ return(-1); } /* * Copy from scope.c. * XXX: we should standardize the functions and link them as standard * library. */ static int gai_addr2scopetype(struct sockaddr *sa) { #ifdef INET6 struct sockaddr_in6 *sa6; #endif struct sockaddr_in *sa4; switch(sa->sa_family) { #ifdef INET6 case AF_INET6: sa6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { /* just use the scope field of the multicast address */ return(sa6->sin6_addr.s6_addr[2] & 0x0f); } /* * Unicast addresses: map scope type to corresponding scope * value defined for multcast addresses. * XXX: hardcoded scope type values are bad... */ if (IN6_IS_ADDR_LOOPBACK(&sa6->sin6_addr)) return(1); /* node local scope */ if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) return(2); /* link-local scope */ if (IN6_IS_ADDR_SITELOCAL(&sa6->sin6_addr)) return(5); /* site-local scope */ return(14); /* global scope */ break; #endif case AF_INET: /* * IPv4 pseudo scoping according to RFC 3484. */ sa4 = (struct sockaddr_in *)sa; /* IPv4 autoconfiguration addresses have link-local scope. */ if (((u_char *)&sa4->sin_addr)[0] == 169 && ((u_char *)&sa4->sin_addr)[1] == 254) return(2); /* Private addresses have site-local scope. */ if (((u_char *)&sa4->sin_addr)[0] == 10 || (((u_char *)&sa4->sin_addr)[0] == 172 && (((u_char *)&sa4->sin_addr)[1] & 0xf0) == 16) || (((u_char *)&sa4->sin_addr)[0] == 192 && ((u_char *)&sa4->sin_addr)[1] == 168)) return(14); /* XXX: It should be 5 unless NAT */ /* Loopback addresses have link-local scope. */ if (((u_char *)&sa4->sin_addr)[0] == 127) return(2); return(14); break; default: errno = EAFNOSUPPORT; /* is this a good error? */ return(-1); } } #endif