diff --git a/usr.bin/locate/locate/Makefile b/usr.bin/locate/locate/Makefile --- a/usr.bin/locate/locate/Makefile +++ b/usr.bin/locate/locate/Makefile @@ -3,7 +3,7 @@ CONFS= locate.rc PROG= locate -SRCS= util.c locate.c +SRCS= util.c statistic.c locate.c CFLAGS+= -I${.CURDIR} -DMMAP SCRIPTS=updatedb.sh mklocatedb.sh concatdb.sh MAN= locate.1 locate.updatedb.8 diff --git a/usr.bin/locate/locate/fastfind.c b/usr.bin/locate/locate/fastfind.c --- a/usr.bin/locate/locate/fastfind.c +++ b/usr.bin/locate/locate/fastfind.c @@ -36,87 +36,6 @@ */ -#ifndef _LOCATE_STATISTIC_ -#define _LOCATE_STATISTIC_ - -void -statistic (fp, path_fcodes) - FILE *fp; /* open database */ - char *path_fcodes; /* for error message */ -{ - long lines, chars, size, size_nbg, big, zwerg, umlaut; - u_char *p, *s; - int c; - int count, longest_path; - int error = 0; - u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX]; - - for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { - p[c] = check_bigram_char(getc(fp)); - s[c] = check_bigram_char(getc(fp)); - } - - lines = chars = big = zwerg = umlaut = longest_path = 0; - size = NBG + NBG; - - for (c = getc(fp), count = 0; c != EOF; size++) { - if (c == SWITCH) { - count += getwf(fp) - OFFSET; - size += sizeof(int); - zwerg++; - } else - count += c - OFFSET; - - if (count < 0 || count >= LOCATE_PATH_MAX) { - /* stop on error and display the statstics anyway */ - warnx("corrupted database: %s %d", path_fcodes, count); - error = 1; - break; - } - - for (p = path + count; (c = getc(fp)) > SWITCH; size++) - if (c < PARITY) { - if (c == UMLAUT) { - c = getc(fp); - size++; - umlaut++; - } - p++; - } else { - /* bigram char */ - big++; - p += 2; - } - - p++; - lines++; - chars += (p - path); - if ((p - path) > longest_path) - longest_path = p - path; - } - - /* size without bigram db */ - size_nbg = size - (2 * NBG); - - (void)printf("\nDatabase: %s\n", path_fcodes); - (void)printf("Compression: Front: %2.2f%%, ", chars > 0 ? (size_nbg + big) / (chars / (float)100) : 0); - (void)printf("Bigram: %2.2f%%, ", big > 0 ? (size_nbg - big) / (size_nbg / (float)100) : 0); - /* incl. bigram db overhead */ - (void)printf("Total: %2.2f%%\n", chars > 0 ? size / (chars / (float)100) : 0); - (void)printf("Filenames: %ld, ", lines); - (void)printf("Characters: %ld, ", chars); - (void)printf("Database size: %ld\n", size); - (void)printf("Bigram characters: %ld, ", big); - (void)printf("Integers: %ld, ", zwerg); - (void)printf("8-Bit characters: %ld\n", umlaut); - printf("Longest path: %d\n", longest_path > 0 ? longest_path - 1 : 0); - - /* non zero exit on corrupt database */ - if (error) - exit(error); -} -#endif /* _LOCATE_STATISTIC_ */ - extern char separator; void diff --git a/usr.bin/locate/locate/locate.c b/usr.bin/locate/locate/locate.c --- a/usr.bin/locate/locate/locate.c +++ b/usr.bin/locate/locate/locate.c @@ -85,6 +85,7 @@ #include #include #include +#include #ifdef MMAP # include @@ -109,7 +110,7 @@ u_char myctype[UCHAR_MAX + 1]; void usage(void); -void statistic(FILE *, char *); +void statistic(FILE *, char *, int); void fastfind(FILE *, char *, char *); void fastfind_icase(FILE *, char *, char *); void fastfind_mmap(char *, caddr_t, off_t, char *); @@ -231,8 +232,8 @@ /* can only read stdin once */ if (f_stdin) { fp = stdin; - if (*(s+1) != NULL) { - warnx("read database from stdin, use only `%s' as pattern", *s); + if (!f_statistic && *(s+1) != NULL) { + warnx("read database from stdin, use only first argument `%s' as pattern", *s); *(s+1) = NULL; } } @@ -249,7 +250,7 @@ /* count only chars or lines */ if (f_statistic) { - statistic(fp, db); + statistic(fp, db, f_statistic++); (void)fclose(fp); return; } @@ -330,8 +331,6 @@ /* load fastfind functions */ - -/* statistic */ /* fastfind_mmap, fastfind_mmap_icase */ #ifdef MMAP #undef FF_MMAP diff --git a/usr.bin/locate/locate/statistic.c b/usr.bin/locate/locate/statistic.c new file mode 100644 --- /dev/null +++ b/usr.bin/locate/locate/statistic.c @@ -0,0 +1,202 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1995-2022 Wolfram Schneider + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include "locate.h" + +extern int getwf(FILE *); +extern int check_bigram_char(int); + +/* display last update time in human readable form */ +char * +last_update (char *db_path) +{ + char *last_update; + time_t db_time; + char *unknown = "unknown"; + struct stat sb; + int len; + + if (strcmp(db_path, "-") == 0) + return unknown; + + /* should not happens */ + if (stat(db_path, &sb) == -1) { + warnx("stat: %s", db_path); + return unknown; + } + + db_time = sb.st_mtime; + last_update = ctime(&db_time); + + /* remove newline */ + len = strlen(last_update); + if (len <= 0) + return unknown; + *(last_update + len - 1) = '\0'; + + return last_update; +} + +/* display last update time in human readable form */ +float +days_since_last_update (char *db_path) +{ + long long diff_time = 0; + struct stat sb; + + if (strcmp(db_path, "-") == 0) + return 0; + + /* should not happens */ + if (stat(db_path, &sb) == -1) { + warnx("stat: %s", db_path); + return 0; + } + + diff_time = difftime(time(NULL), (time_t)sb.st_mtime); + if (diff_time < 0) { + warnx("time stamp is in future"); + return 0; + } + + return diff_time; +} + + +void +statistic (fp, db_path, db_number) + FILE *fp; /* open database */ + char *db_path; /* for error message */ + int db_number; /* 1..N */ +{ + long filenames, total_bytes, size, size_nbg, bigrams, zwerg, umlaut; + int c; + int count, longest_path; + int error = 0; + u_char *p, *s; + u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX]; + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } + + filenames = total_bytes = bigrams = zwerg = umlaut = longest_path = 0; + size = NBG + NBG; + + for (c = getc(fp), count = 0; c != EOF; size++) { + if (c == SWITCH) { + count += getwf(fp) - OFFSET; + size += sizeof(int); + zwerg++; + } else + count += c - OFFSET; + + if (count < 0 || count >= LOCATE_PATH_MAX) { + /* stop on error and display the statstics anyway */ + warnx("corrupted database: %s %d", db_path, count); + error = 1; + break; + } + + for (p = path + count; (c = getc(fp)) > SWITCH; size++) + if (c < PARITY) { + if (c == UMLAUT) { + c = getc(fp); + size++; + umlaut++; + } + p++; + } else { + /* bigram char */ + bigrams++; + p += 2; + } + + p++; + filenames++; + total_bytes += (p - path); + if ((p - path) > longest_path) + longest_path = p - path; + } + + if (!feof(fp) || ferror(fp)) { + error = 1; + warnx("stdin"); + } + + /* size without bigram db */ + size_nbg = size - (2 * NBG); + + /* more than one database statistic */ + if (db_number > 1) + printf("\n"); + + printf("Database: %s\n", db_path); + printf("Last database update: %s (%3.1f days ago)\n", + last_update(db_path), days_since_last_update(db_path)/86400); + printf("Database size: %ld bytes\n", size); + printf("Filenames: %ld\n", filenames); + printf("Longest pathname: %d bytes\n", longest_path > 0 ? longest_path - 1 : 0); + + printf("Compression factor: "); + /* 0 .. N usually a factor 5-8 */ + printf("front: %2.1fx, ", total_bytes > 0 ? (float)total_bytes / (size_nbg + bigrams) : 0); + + /* 0, 1..2, usually a factor 1.3 */ + printf("bigrams: %2.1fx, ", bigrams > 0 ? (float)(size_nbg + bigrams) / size_nbg : 0); + + /* incl. bigram db overhead, usually a factor 8-12 */ + printf("total: %2.1fx\n", total_bytes > 0 ? (float)total_bytes / size : 0); + + printf("Filenames size: %ld bytes\n", total_bytes); + printf("Data types: "); + printf("int: %ld, ", zwerg); + printf("bigrams: %ld, ", bigrams); + printf("nonprintable ASCII: %ld\n", umlaut); + + /* non zero exit on corrupt database */ + if (error) + exit(error); +}