diff --git a/usr.bin/locate/bigram/locate.bigram.c b/usr.bin/locate/bigram/locate.bigram.c index d15f8ce170e8..a654448680a3 100644 --- a/usr.bin/locate/bigram/locate.bigram.c +++ b/usr.bin/locate/bigram/locate.bigram.c @@ -1,115 +1,114 @@ /* * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995-2022 Wolfram Schneider * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * James A. Woods. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #if 0 #ifndef lint static char copyright[] = "@(#) Copyright (c) 1989, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif /* * bigram < sorted_file_names | sort -nr | * awk 'NR <= 128 { printf $2 }' > bigrams * * List bigrams for 'updatedb' script. * Use 'code' to encode a file using this output. */ #include #include #include #include -#include /* for MAXPATHLEN */ #include "locate.h" -u_char buf1[MAXPATHLEN] = " "; -u_char buf2[MAXPATHLEN]; +u_char buf1[LOCATE_PATH_MAX] = " "; +u_char buf2[LOCATE_PATH_MAX]; unsigned long bigram[UCHAR_MAX + 1][UCHAR_MAX + 1]; int main(void) { u_char *cp; u_char *oldpath = buf1, *path = buf2; u_int i, j; if (caph_limit_stdio() < 0 || caph_enter() < 0) err(1, "capsicum"); while (fgets(path, sizeof(buf2), stdin) != NULL) { /* * We don't need remove newline character '\n'. * '\n' is less than ASCII_MIN and will be later * ignored at output. */ /* skip longest common prefix */ for (cp = path; *cp == *oldpath; cp++, oldpath++) if (*cp == '\0') break; while (*cp != '\0' && *(cp + 1) != '\0') { bigram[(u_char)*cp][(u_char)*(cp + 1)]++; cp += 2; } /* swap pointers */ if (path == buf1) { path = buf2; oldpath = buf1; } else { path = buf1; oldpath = buf2; } } /* output, boundary check */ for (i = ASCII_MIN; i <= ASCII_MAX; i++) for (j = ASCII_MIN; j <= ASCII_MAX; j++) if (bigram[i][j] != 0) printf("%lu %c%c\n", bigram[i][j], i, j); exit(0); } diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c index 9da78d55e622..5263d9ee8fb0 100644 --- a/usr.bin/locate/code/locate.code.c +++ b/usr.bin/locate/code/locate.code.c @@ -1,281 +1,281 @@ /* * SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1995-2022 Wolfram Schneider * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * James A. Woods. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #if 0 #ifndef lint static char copyright[] = "@(#) Copyright (c) 1989, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif /* * PURPOSE: sorted list compressor (works with a modified 'find' * to encode/decode a filename database) * * USAGE: bigram < list > bigrams * process bigrams (see updatedb) > common_bigrams * code common_bigrams < list > squozen_list * * METHOD: Uses 'front compression' (see ";login:", Volume 8, Number 1 * February/March 1983, p. 8). Output format is, per line, an * offset differential count byte followed by a partially bigram- * encoded ascii residue. A bigram is a two-character sequence, * the first 128 most common of which are encoded in one byte. * * EXAMPLE: For simple front compression with no bigram encoding, * if the input is... then the output is... * * /usr/src 0 /usr/src * /usr/src/cmd/aardvark.c 8 /cmd/aardvark.c * /usr/src/cmd/armadillo.c 14 armadillo.c * /usr/tmp/zoo 5 tmp/zoo * * The codes are: * * 0-28 likeliest differential counts + offset to make nonnegative * 30 switch code for out-of-range count to follow in next word * 31 an 8 bit char followed * 128-255 bigram codes (128 most common, as determined by 'updatedb') * 32-127 single character (printable) ascii residue (ie, literal) * * The locate database store any character except newline ('\n') * and NUL ('\0'). The 8-bit character support don't wast extra * space until you have characters in file names less than 32 * or greather than 127. * * * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c * * AUTHOR: James A. Woods, Informatics General Corp., * NASA Ames Research Center, 10/82 * 8-bit file names characters: * Wolfram Schneider, Berlin September 1996 */ #include #include #include #include #include #include #include #include "locate.h" #define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ -u_char buf1[MAXPATHLEN] = " "; -u_char buf2[MAXPATHLEN]; +u_char buf1[LOCATE_PATH_MAX] = " "; +u_char buf2[LOCATE_PATH_MAX]; u_char bigrams[BGBUFSIZE + 1] = { 0 }; #define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ #ifdef LOOKUP #define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)]) typedef short bg_t; bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1]; #else #define BGINDEX(x) bgindex(x) typedef int bg_t; int bgindex(char *); #endif /* LOOKUP */ void usage(void); int main(int argc, char *argv[]) { u_char *cp, *oldpath, *path; int ch, code, count, diffcount, oldcount; u_int i, j; FILE *fp; while ((ch = getopt(argc, argv, "")) != -1) switch(ch) { default: usage(); } argc -= optind; argv += optind; if (argc != 1) usage(); if ((fp = fopen(argv[0], "r")) == NULL) err(1, "%s", argv[0]); /* First copy bigram array to stdout. */ if (fgets(bigrams, BGBUFSIZE + 1, fp) == NULL) { if (!feof(fp) || ferror(fp)) err(1, "get bigram array"); } if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE) err(1, "stdout"); (void)fclose(fp); #ifdef LOOKUP /* init lookup table */ for (i = 0; i < UCHAR_MAX + 1; i++) for (j = 0; j < UCHAR_MAX + 1; j++) big[i][j] = (bg_t)-1; for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i; #endif /* LOOKUP */ oldpath = buf1; path = buf2; oldcount = 0; while (fgets(path, sizeof(buf2), stdin) != NULL) { /* skip empty lines */ if (*path == '\n') continue; /* remove newline */ for (cp = path; *cp != '\0'; cp++) { #ifndef LOCATE_CHAR30 /* old locate implementations core'd for char 30 */ if (*cp == SWITCH) *cp = '?'; else #endif /* !LOCATE_CHAR30 */ /* chop newline */ if (*cp == '\n') *cp = '\0'; } /* Skip longest common prefix. */ for (cp = path; *cp == *oldpath; cp++, oldpath++) if (*cp == '\0') break; count = cp - path; diffcount = count - oldcount + OFFSET; oldcount = count; if (diffcount < 0 || diffcount > 2 * OFFSET) { if (putchar(SWITCH) == EOF || putw(diffcount, stdout) == EOF) err(1, "stdout"); } else if (putchar(diffcount) == EOF) err(1, "stdout"); while (*cp != '\0') { /* print *two* characters */ if ((code = BGINDEX(cp)) != (bg_t)-1) { /* * print *one* as bigram * Found, so mark byte with * parity bit. */ if (putchar((code / 2) | PARITY) == EOF) err(1, "stdout"); cp += 2; } else { for (i = 0; i < 2; i++) { if (*cp == '\0') break; /* print umlauts in file names */ if (*cp < ASCII_MIN || *cp > ASCII_MAX) { if (putchar(UMLAUT) == EOF || putchar(*cp++) == EOF) err(1, "stdout"); } else { /* normal character */ if(putchar(*cp++) == EOF) err(1, "stdout"); } } } } if (path == buf1) { /* swap pointers */ path = buf2; oldpath = buf1; } else { path = buf1; oldpath = buf2; } } /* Non-zero status if there were errors */ if (fflush(stdout) != 0 || ferror(stdout)) errx(1, "stdout"); exit(0); } #ifndef LOOKUP int bgindex(char *bg) /* Return location of bg in bigrams or -1. */ { char bg0, bg1, *p; bg0 = bg[0]; bg1 = bg[1]; for (p = bigrams; *p != NULL; p++) if (*p++ == bg0 && *p == bg1) break; return (*p == NULL ? -1 : (--p - bigrams)); } #endif /* !LOOKUP */ void usage(void) { (void)fprintf(stderr, "usage: locate.code common_bigrams < list > squozen_list\n"); exit(1); } diff --git a/usr.bin/locate/locate/fastfind.c b/usr.bin/locate/locate/fastfind.c index 9a3324e20e44..8cc9af924531 100644 --- a/usr.bin/locate/locate/fastfind.c +++ b/usr.bin/locate/locate/fastfind.c @@ -1,354 +1,354 @@ /* * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995-2022 Wolfram Schneider * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * James A. Woods. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LOCATE_STATISTIC_ #define _LOCATE_STATISTIC_ void statistic (fp, path_fcodes) FILE *fp; /* open database */ char *path_fcodes; /* for error message */ { long lines, chars, size, size_nbg, big, zwerg, umlaut; u_char *p, *s; int c; int count, longest_path; int error = 0; - u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX]; for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { p[c] = check_bigram_char(getc(fp)); s[c] = check_bigram_char(getc(fp)); } lines = chars = big = zwerg = umlaut = longest_path = 0; size = NBG + NBG; for (c = getc(fp), count = 0; c != EOF; size++) { if (c == SWITCH) { count += getwf(fp) - OFFSET; size += sizeof(int); zwerg++; } else count += c - OFFSET; - if (count < 0 || count >= MAXPATHLEN) { + if (count < 0 || count >= LOCATE_PATH_MAX) { /* stop on error and display the statstics anyway */ - warnx("corrupted database: %s", path_fcodes); + warnx("corrupted database: %s %d", path_fcodes, count); error = 1; break; } for (p = path + count; (c = getc(fp)) > SWITCH; size++) if (c < PARITY) { if (c == UMLAUT) { c = getc(fp); size++; umlaut++; } p++; } else { /* bigram char */ big++; p += 2; } p++; lines++; chars += (p - path); if ((p - path) > longest_path) longest_path = p - path; } /* size without bigram db */ size_nbg = size - (2 * NBG); (void)printf("\nDatabase: %s\n", path_fcodes); (void)printf("Compression: Front: %2.2f%%, ", chars > 0 ? (size_nbg + big) / (chars / (float)100) : 0); (void)printf("Bigram: %2.2f%%, ", big > 0 ? (size_nbg - big) / (size_nbg / (float)100) : 0); /* incl. bigram db overhead */ (void)printf("Total: %2.2f%%\n", chars > 0 ? size / (chars / (float)100) : 0); (void)printf("Filenames: %ld, ", lines); (void)printf("Characters: %ld, ", chars); (void)printf("Database size: %ld\n", size); (void)printf("Bigram characters: %ld, ", big); (void)printf("Integers: %ld, ", zwerg); (void)printf("8-Bit characters: %ld\n", umlaut); printf("Longest path: %d\n", longest_path > 0 ? longest_path - 1 : 0); /* non zero exit on corrupt database */ if (error) exit(error); } #endif /* _LOCATE_STATISTIC_ */ extern char separator; void #ifdef FF_MMAP #ifdef FF_ICASE fastfind_mmap_icase #else fastfind_mmap #endif /* FF_ICASE */ (pathpart, paddr, len, database) char *pathpart; /* search string */ caddr_t paddr; /* mmap pointer */ off_t len; /* length of database */ char *database; /* for error message */ #else /* MMAP */ #ifdef FF_ICASE fastfind_icase #else fastfind #endif /* FF_ICASE */ (fp, pathpart, database) FILE *fp; /* open database */ char *pathpart; /* search string */ char *database; /* for error message */ #endif /* MMAP */ { u_char *p, *s, *patend, *q, *foundchar; int c, cc; int count, found, globflag; u_char *cutoff; - u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN + 2]; + u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX + 2]; #ifdef FF_ICASE /* use a lookup table for case insensitive search */ u_char table[UCHAR_MAX + 1]; tolower_word(pathpart); #endif /* FF_ICASE*/ /* init bigram table */ #ifdef FF_MMAP for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { p[c] = check_bigram_char(*paddr++); s[c] = check_bigram_char(*paddr++); } #else for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { p[c] = check_bigram_char(getc(fp)); s[c] = check_bigram_char(getc(fp)); } #endif /* FF_MMAP */ /* find optimal (last) char for searching */ for (p = pathpart; *p != '\0'; p++) if (strchr(LOCATE_REG, *p) != NULL) break; if (*p == '\0') globflag = 0; else globflag = 1; p = pathpart; patend = patprep(p); cc = *patend; #ifdef FF_ICASE /* set patend char to true */ for (c = 0; c < UCHAR_MAX + 1; c++) table[c] = 0; table[TOLOWER(*patend)] = 1; table[toupper(*patend)] = 1; #endif /* FF_ICASE */ /* main loop */ found = count = 0; foundchar = 0; #ifdef FF_MMAP c = (u_char)*paddr++; len--; for (; len > 0; ) { #else c = getc(fp); for (; c != EOF; ) { #endif /* FF_MMAP */ /* go forward or backward */ if (c == SWITCH) { /* big step, an integer */ #ifdef FF_MMAP if (len < sizeof(int)) errx(1, "corrupted database: %s", database); count += getwm(paddr) - OFFSET; len -= INTSIZE; paddr += INTSIZE; #else count += getwf(fp) - OFFSET; #endif /* FF_MMAP */ } else { /* slow step, =< 14 chars */ count += c - OFFSET; } - if (count < 0 || count >= MAXPATHLEN) + if (count < 0 || count >= LOCATE_PATH_MAX) errx(1, "corrupted database: %s %d", database, count); /* overlay old path */ p = path + count; foundchar = p - 1; #ifdef FF_MMAP for (; len > 0;) { c = (u_char)*paddr++; len--; #else for (;;) { c = getc(fp); #endif /* FF_MMAP */ /* * == UMLAUT: 8 bit char followed * <= SWITCH: offset * >= PARITY: bigram * rest: single ascii char * * offset < SWITCH < UMLAUT < ascii < PARITY < bigram */ if (c < PARITY) { if (c <= UMLAUT) { if (c == UMLAUT) { #ifdef FF_MMAP c = (u_char)*paddr++; len--; #else c = getc(fp); #endif /* FF_MMAP */ } else break; /* SWITCH */ } #ifdef FF_ICASE if (table[c]) #else if (c == cc) #endif /* FF_ICASE */ foundchar = p; *p++ = c; } else { /* bigrams are parity-marked */ TO7BIT(c); #ifndef FF_ICASE if (bigram1[c] == cc || bigram2[c] == cc) #else if (table[bigram1[c]] || table[bigram2[c]]) #endif /* FF_ICASE */ foundchar = p + 1; *p++ = bigram1[c]; *p++ = bigram2[c]; } - if (p - path >= MAXPATHLEN) - errx(1, "corrupted database: %s", database); + if (p - path >= LOCATE_PATH_MAX) + errx(1, "corrupted database: %s %ld", database, p - path); } if (found) { /* previous line matched */ cutoff = path; *p-- = '\0'; foundchar = p; } else if (foundchar >= path + count) { /* a char matched */ *p-- = '\0'; cutoff = path + count; } else /* nothing to do */ continue; found = 0; for (s = foundchar; s >= cutoff; s--) { if (*s == cc #ifdef FF_ICASE || TOLOWER(*s) == cc #endif /* FF_ICASE */ ) { /* fast first char check */ for (p = patend - 1, q = s - 1; *p != '\0'; p--, q--) if (*q != *p #ifdef FF_ICASE && TOLOWER(*q) != *p #endif /* FF_ICASE */ ) break; if (*p == '\0') { /* fast match success */ found = 1; if (!globflag || #ifndef FF_ICASE !fnmatch(pathpart, path, 0)) #else !fnmatch(pathpart, path, FNM_CASEFOLD)) #endif /* !FF_ICASE */ { if (f_silent) counter++; else if (f_limit) { counter++; if (f_limit >= counter) (void)printf("%s%c",path,separator); else errx(0, "[show only %ld lines]", counter - 1); } else (void)printf("%s%c",path,separator); } break; } } } } } diff --git a/usr.bin/locate/locate/locate.h b/usr.bin/locate/locate/locate.h index 6393160d55b4..18187ca00d84 100644 --- a/usr.bin/locate/locate/locate.h +++ b/usr.bin/locate/locate/locate.h @@ -1,70 +1,76 @@ /* * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)locate.h 8.1 (Berkeley) 6/6/93 * $FreeBSD$ */ /* Symbolic constants shared by locate.c and code.c */ #define NBG 128 /* number of bigrams considered */ #define OFFSET 14 /* abs value of max likely diff */ #define PARITY 0200 /* parity bit */ #define SWITCH 30 /* switch code */ #define UMLAUT 31 /* an 8 bit char followed */ /* 0-28 likeliest differential counts + offset to make nonnegative */ #define LDC_MIN 0 #define LDC_MAX 28 /* 128-255 bigram codes (128 most common, as determined by 'updatedb') */ #define BIGRAM_MIN (UCHAR_MAX - SCHAR_MAX) #define BIGRAM_MAX UCHAR_MAX /* 32-127 single character (printable) ascii residue (ie, literal) */ #define ASCII_MIN 32 #define ASCII_MAX SCHAR_MAX /* #define TO7BIT(x) (x = ( ((u_char)x) & SCHAR_MAX )) */ #define TO7BIT(x) (x = x & SCHAR_MAX ) #if UCHAR_MAX >= 4096 define TOLOWER(ch) tolower(ch) #else extern u_char myctype[UCHAR_MAX + 1]; #define TOLOWER(ch) (myctype[ch]) #endif #define INTSIZE (sizeof(int)) #define LOCATE_REG "*?[]\\" /* fnmatch(3) meta characters */ + +/* max. path length for locate. Should be at least 1024 (PATH_MAX), but can be longer */ +#ifndef LOCATE_PATH_MAX +#define LOCATE_PATH_MAX (1*1024) +#endif + diff --git a/usr.bin/locate/locate/util.c b/usr.bin/locate/locate/util.c index 8482ec998f03..1d15f83b6826 100644 --- a/usr.bin/locate/locate/util.c +++ b/usr.bin/locate/locate/util.c @@ -1,271 +1,270 @@ /* * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995-2022 Wolfram Schneider * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * James A. Woods. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ - +#include #include #include #include -#include #include #include #include "locate.h" char **colon(char **, char*, char*); char *patprep(char *); u_char *tolower_word(u_char *); int getwm(caddr_t); int getwf(FILE *); int check_bigram_char(int); /* * Validate bigram chars. If the test failed the database is corrupt * or the database is obviously not a locate database. */ int check_bigram_char(ch) int ch; { /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */ if (ch == 0 || (ch >= ASCII_MIN && ch <= ASCII_MAX)) return(ch); errx(1, "locate database header corrupt, bigram char outside 0, %d-%d: %d", ASCII_MIN, ASCII_MAX, ch); exit(1); } /* split a colon separated string into a char vector * * "bla:foo" -> {"foo", "bla"} * "bla:" -> {"foo", dot} * "bla" -> {"bla"} * "" -> do nothing * */ char ** colon(char **dbv, char *path, char *dot) { int vlen, slen; char *c, *ch, *p; char **pv; if (dbv == NULL) { if ((dbv = malloc(sizeof(char *))) == NULL) err(1, "malloc"); *dbv = NULL; } /* empty string */ if (*path == '\0') { warnx("empty database name, ignored"); return(dbv); } /* length of string vector */ for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++); for (ch = c = path; ; ch++) { if (*ch == ':' || (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) { /* single colon -> dot */ if (ch == c) p = dot; else { /* a string */ slen = ch - c; if ((p = malloc(sizeof(char) * (slen + 1))) == NULL) err(1, "malloc"); bcopy(c, p, slen); *(p + slen) = '\0'; } /* increase dbv with element p */ if ((dbv = realloc(dbv, sizeof(char *) * (vlen + 2))) == NULL) err(1, "realloc"); *(dbv + vlen) = p; *(dbv + ++vlen) = NULL; c = ch + 1; } if (*ch == '\0') break; } return (dbv); } /* * extract last glob-free subpattern in name for fast pre-match; prepend * '\0' for backwards match; return end of new pattern */ static char globfree[100]; char * patprep(name) char *name; { char *endmark, *p, *subp; subp = globfree; *subp++ = '\0'; /* set first element to '\0' */ p = name + strlen(name) - 1; /* skip trailing metacharacters */ for (; p >= name; p--) if (strchr(LOCATE_REG, *p) == NULL) break; /* * check if maybe we are in a character class * * 'foo.[ch]' * |----< p */ if (p >= name && (strchr(p, '[') != NULL || strchr(p, ']') != NULL)) { for (p = name; *p != '\0'; p++) if (*p == ']' || *p == '[') break; p--; /* * cannot find a non-meta character, give up * '*\*[a-z]' * |-------< p */ if (p >= name && strchr(LOCATE_REG, *p) != NULL) p = name - 1; } if (p < name) /* only meta chars: "???", force '/' search */ *subp++ = '/'; else { for (endmark = p; p >= name; p--) if (strchr(LOCATE_REG, *p) != NULL) break; for (++p; (p <= endmark) && subp < (globfree + sizeof(globfree));) *subp++ = *p++; } *subp = '\0'; return(--subp); } /* tolower word */ u_char * tolower_word(word) u_char *word; { u_char *p; for(p = word; *p != '\0'; p++) *p = TOLOWER(*p); return(word); } /* * Read integer from mmap pointer. * Essentially a simple ``return *(int *)p'' but avoids sigbus * for integer alignment (SunOS 4.x, 5.x). * * Convert network byte order to host byte order if necessary. * So we can read a locate database on FreeBSD/i386 (little endian) * which was built on SunOS/sparc (big endian). */ int getwm(p) caddr_t p; { union { char buf[INTSIZE]; int i; } u; int i, hi; /* the integer is stored by an offset of 14 (!!!) */ - int i_max = MAXPATHLEN + OFFSET; - int i_min = -(MAXPATHLEN - OFFSET); + int i_max = LOCATE_PATH_MAX + OFFSET; + int i_min = -(LOCATE_PATH_MAX - OFFSET); for (i = 0; i < (int)INTSIZE; i++) u.buf[i] = *p++; i = u.i; if (i >= i_max || i <= i_min) { hi = ntohl(i); if (hi >= i_max || hi <= i_min) errx(1, "integer out of range: %d < %d < %d", i_min, abs(i) < abs(hi) ? i : hi, i_max); return(hi); } return(i); } /* * Read integer from stream. * * Convert network byte order to host byte order if necessary. * So we can read on FreeBSD/i386 (little endian) a locate database * which was built on SunOS/sparc (big endian). */ int getwf(fp) FILE *fp; { int word, hword; - int i_max = MAXPATHLEN + OFFSET; - int i_min = -(MAXPATHLEN - OFFSET); + int i_max = LOCATE_PATH_MAX + OFFSET; + int i_min = -(LOCATE_PATH_MAX - OFFSET); word = getw(fp); if (word >= i_max || word <= i_min) { hword = ntohl(word); if (hword >= i_max || hword <= i_min) errx(1, "integer out of range: %d < %d < %d", i_min, abs(word) < abs(hword) ? word : hword, i_max); return(hword); } return(word); }