Index: usr.bin/locate/locate/statistic.c =================================================================== --- /dev/null +++ usr.bin/locate/locate/statistic.c @@ -0,0 +1,201 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1995-2022 Wolfram Schneider + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include "locate.h" + +extern int getwf(FILE *); +extern int check_bigram_char(int); + +/* display last update time in human readable form */ +char * +last_update (char *db_path) +{ + char *last_update; + time_t db_time; + char *unknown = "unknown"; + struct stat sb; + int len; + + if (strcmp(db_path, "-") == 0) + return unknown; + + /* should not happens */ + if (stat(db_path, &sb) == -1) { + warnx("stat: %s", db_path); + return unknown; + } + + db_time = sb.st_mtime; + last_update = ctime(&db_time); + + /* remove newline */ + len = strlen(last_update); + if (len <= 0) + return unknown; + *(last_update + len - 1) = '\0'; + + return last_update; +} + +/* display last update time in human readable form */ +float +days_since_last_update (char *db_path) +{ + long long diff_time = 0; + struct stat sb; + + if (strcmp(db_path, "-") == 0) + return 0; + + /* should not happens */ + if (stat(db_path, &sb) == -1) { + warnx("stat: %s", db_path); + return 0; + } + + diff_time = difftime(time(NULL), (time_t)sb.st_mtime); + if (diff_time < 0) { + warnx("time stamp is in future"); + return 0; + } + + return diff_time; +} + + +void +statistic (fp, db_path, db_number) + FILE *fp; /* open database */ + char *db_path; /* for error message */ + int db_number; /* 1..N */ +{ + long filenames, total_bytes, size, size_nbg, bigrams, zwerg, umlaut; + int c; + int count, longest_path; + int error = 0; + u_char *p, *s; + u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX]; + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } + + filenames = total_bytes = bigrams = zwerg = umlaut = longest_path = 0; + size = NBG + NBG; + + for (c = getc(fp), count = 0; c != EOF; size++) { + if (c == SWITCH) { + count += getwf(fp) - OFFSET; + size += sizeof(int); + zwerg++; + } else + count += c - OFFSET; + + if (count < 0 || count >= LOCATE_PATH_MAX) { + /* stop on error and display the statstics anyway */ + warnx("corrupted database: %s %d", db_path, count); + error = 1; + break; + } + + for (p = path + count; (c = getc(fp)) > SWITCH; size++) + if (c < PARITY) { + if (c == UMLAUT) { + c = getc(fp); + size++; + umlaut++; + } + p++; + } else { + /* bigram char */ + bigrams++; + p += 2; + } + + p++; + filenames++; + total_bytes += (p - path); + if ((p - path) > longest_path) + longest_path = p - path; + } + + if (!feof(fp) || ferror(fp)) { + error = 1; + warnx("stdin"); + } + + /* size without bigram db */ + size_nbg = size - (2 * NBG); + + /* more than one database statistic */ + if (db_number > 1) + printf("\n"); + + printf("Database: %s\n", db_path); + printf("Last database update: %s (%3.1f days ago)\n", last_update(db_path), days_since_last_update(db_path)/86400); + printf("Database size: %ld bytes\n", size); + printf("Filenames: %ld\n", filenames); + printf("Longest pathname: %d bytes\n", longest_path > 0 ? longest_path - 1 : 0); + + printf("Compression factor: "); + /* 0 .. N usually a factor 5-8 */ + printf("front: %2.1fx, ", total_bytes > 0 ? (float)total_bytes / (size_nbg + bigrams) : 0); + + /* 0, 1..2, usually a factor 1.3 */ + printf("bigrams: %2.1fx, ", bigrams > 0 ? (float)(size_nbg + bigrams) / size_nbg : 0); + + /* incl. bigram db overhead, usually a factor 8-12 */ + printf("total: %2.1fx\n", total_bytes > 0 ? (float)total_bytes / size : 0); + + printf("Filenames size: %ld bytes\n", total_bytes); + printf("Data types: "); + printf("int: %ld, ", zwerg); + printf("bigrams: %ld, ", bigrams); + printf("nonprintable ASCII: %ld\n", umlaut); + + /* non zero exit on corrupt database */ + if (error) + exit(error); +}