Index: usr.bin/diff/diffreg.c =================================================================== --- usr.bin/diff/diffreg.c +++ usr.bin/diff/diffreg.c @@ -71,6 +71,7 @@ #include #include +#include #include #include @@ -183,12 +184,12 @@ #define MIN_PAD 1 static FILE *opentemp(const char *); static void output(char *, FILE *, char *, FILE *, int); -static void check(FILE *, FILE *, int); +static void check(const uint8_t *, const uint8_t *, size_t, size_t, int); static void range(int, int, const char *); static void uni_range(int, int); static void dump_context_vec(FILE *, FILE *, int); static void dump_unified_vec(FILE *, FILE *, int); -static void prepare(int, FILE *, size_t, int); +static void prepare(int, const uint8_t *, size_t, int); static void prune(void); static void equiv(struct line *, int, struct line *, int, int *); static void unravel(int); @@ -199,15 +200,15 @@ static void print_space(int, int, int); static bool ignoreline_pattern(char *); static bool ignoreline(char *, bool); -static int asciifile(FILE *); +static int asciifile(const uint8_t *, size_t); static int fetch(long *, int, int, FILE *, int, int, int); static int newcand(int, int, int); static int search(int *, int, int); -static int skipline(FILE *); +static int skipline(const uint8_t **, const uint8_t *); static int isqrt(int); static int stone(int *, int, int *, int *, int); -static int readhash(FILE *, int); -static int files_differ(FILE *, FILE *, int); +static int readhash(const uint8_t **, const uint8_t *, int); +static int files_differ(const uint8_t *, const uint8_t *, int); static char *match_function(const long *, int, FILE *); static char *preadline(int, size_t, off_t); @@ -256,11 +257,13 @@ diffreg(char *file1, char *file2, int flags, int capsicum) { FILE *f1, *f2; + char *buf1, *buf2; int i, rval; struct pr *pr = NULL; cap_rights_t rights_ro; f1 = f2 = NULL; + buf1 = buf2 = NULL; rval = D_SAME; anychange = 0; lastline = 0; @@ -341,7 +344,8 @@ pr = start_pr(file1, file2); if (capsicum) { - cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); + cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK, + CAP_MMAP); if (caph_rights_limit(fileno(f1), &rights_ro) < 0) err(2, "unable to limit rights on: %s", file1); if (caph_rights_limit(fileno(f2), &rights_ro) < 0) @@ -361,7 +365,14 @@ err(2, "unable to enter capability mode"); } - switch (files_differ(f1, f2, flags)) { + buf1 = mmap(NULL, stb1.st_size, PROT_READ, MAP_SHARED, fileno(f1), 0); + if (buf1 == NULL) + err(2, "unable to mmap"); + buf2 = mmap(NULL, stb2.st_size, PROT_READ, MAP_SHARED, fileno(f2), 0); + if (buf2 == NULL) + err(2, "unable to mmap"); + + switch (files_differ(buf1, buf2, flags)) { case 0: goto closem; case 1: @@ -381,13 +392,14 @@ goto closem; } if ((flags & D_FORCEASCII) == 0 && - (!asciifile(f1) || !asciifile(f2))) { + (!asciifile(buf1, stb1.st_size) || + !asciifile(buf2, stb2.st_size))) { rval = D_BINARY; status |= 1; goto closem; } - prepare(0, f1, stb1.st_size, flags); - prepare(1, f2, stb2.st_size, flags); + prepare(0, buf1, stb1.st_size, flags); + prepare(1, buf2, stb2.st_size, flags); prune(); sort(sfile[0], slen[0]); @@ -416,7 +428,7 @@ ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); - check(f1, f2, flags); + check(buf1, buf2, stb1.st_size, stb2.st_size, flags); output(file1, f1, file2, f2, flags); closem: @@ -427,6 +439,10 @@ if (rval == D_SAME) rval = D_DIFFER; } + if (buf1 != NULL) + munmap(buf1, stb1.st_size); + if (buf2 != NULL) + munmap(buf2, stb2.st_size); if (f1 != NULL) fclose(f1); if (f2 != NULL) @@ -438,29 +454,15 @@ /* * Check to see if the given files differ. * Returns 0 if they are the same, 1 if different, and -1 on error. - * XXX - could use code from cmp(1) [faster] */ static int -files_differ(FILE *f1, FILE *f2, int flags) +files_differ(const uint8_t *buf1, const uint8_t *buf2, int flags) { - char buf1[BUFSIZ], buf2[BUFSIZ]; - size_t i, j; if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) return (1); - for (;;) { - i = fread(buf1, 1, sizeof(buf1), f1); - j = fread(buf2, 1, sizeof(buf2), f2); - if ((!i && ferror(f1)) || (!j && ferror(f2))) - return (-1); - if (i != j) - return (1); - if (i == 0) - return (0); - if (memcmp(buf1, buf2, i) != 0) - return (1); - } + return (memcmp(buf1, buf2, stb1.st_size) != 0); } static FILE * @@ -512,20 +514,20 @@ } static void -prepare(int i, FILE *fd, size_t filesize, int flags) +prepare(int i, const uint8_t *buf, size_t filesize, int flags) { struct line *p; int h; size_t sz, j; - - rewind(fd); + const uint8_t *walk = buf; + const uint8_t *end = buf + filesize; sz = MIN(filesize, SIZE_MAX) / 25; if (sz < 100) sz = 100; p = xcalloc(sz + 3, sizeof(*p)); - for (j = 0; (h = readhash(fd, flags));) { + for (j = 0; (h = readhash(&walk, end, flags));) { if (j == sz) { sz = sz * 3 / 2; p = xreallocarray(p, sz + 3, sizeof(*p)); @@ -705,6 +707,8 @@ J[q->x + pref] = q->y + pref; } +#define chareq(buf, end, c) (buf != end && *buf == c) + /* * Check does double duty: * 1. ferret out any fortuitous correspondences due @@ -712,111 +716,118 @@ * 2. collect random access indexes to the two files */ static void -check(FILE *f1, FILE *f2, int flags) +check(const uint8_t *buf1, const uint8_t *buf2, size_t sz1, size_t sz2, int flags) { - int i, j, jackpot, c, d; + int i, j, jackpot; long ctold, ctnew; + const uint8_t *end1 = buf1 + sz1; + const uint8_t *end2 = buf2 + sz2; - rewind(f1); - rewind(f2); j = 1; ixold[0] = ixnew[0] = 0; jackpot = 0; ctold = ctnew = 0; for (i = 1; i <= len[0]; i++) { if (J[i] == 0) { - ixold[i] = ctold += skipline(f1); + ixold[i] = ctold += skipline(&buf1, end1); continue; } while (j < J[i]) { - ixnew[j] = ctnew += skipline(f2); + ixnew[j] = ctnew += skipline(&buf2, end2); j++; } if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { - for (;;) { - c = getc(f1); - d = getc(f2); + for (;;buf1++, buf2++) { /* * GNU diff ignores a missing newline * in one file for -b or -w. */ if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { - if (c == EOF && d == '\n') { + if (buf1 == end1 && chareq(buf2, end2, '\n')) { ctnew++; break; - } else if (c == '\n' && d == EOF) { + } else if (chareq(buf1, end1, '\n') && buf2 == end2) { ctold++; break; } } ctold++; ctnew++; - if (flags & D_STRIPCR && (c == '\r' || d == '\r')) { - if (c == '\r') { - if ((c = getc(f1)) == '\n') { + if (flags & D_STRIPCR + && (chareq(buf1, end1, '\r') + || chareq(buf2, end2, '\r'))) { + if (*buf1 == '\r') { + if (buf1 + 1 != end1 && buf1[1] == '\n') { ctold++; - } else { - ungetc(c, f1); + buf1++; } } - if (d == '\r') { - if ((d = getc(f2)) == '\n') { + if (*buf2 == '\r') { + if (buf2 + 1 != end2 && buf2[1] == '\n') { ctnew++; - } else { - ungetc(d, f2); + buf2++; } } break; } - if ((flags & D_FOLDBLANKS) && isspace(c) && - isspace(d)) { + if ((flags & D_FOLDBLANKS) && + (buf1 != end1 && isspace(*buf1)) && + (buf2 != end2 && isspace(*buf2))) { do { - if (c == '\n') + if (*buf1 == '\n') break; ctold++; - } while (isspace(c = getc(f1))); + } while (buf1 + 1 != end1 && isspace(*(++buf1))); do { - if (d == '\n') + if (*buf2 == '\n') break; ctnew++; - } while (isspace(d = getc(f2))); + } while (buf2 + 1 != end2 && isspace(*(++buf2))); } else if ((flags & D_IGNOREBLANKS)) { - while (isspace(c) && c != '\n') { - c = getc(f1); + while (buf1 != end1 && isspace(*buf1) && *buf1 != '\n') { + buf1++; ctold++; } - while (isspace(d) && d != '\n') { - d = getc(f2); + while (buf2 != end2 && isspace(*buf2) && *buf2 != '\n') { + buf2++; ctnew++; } } - if (chrtran(c) != chrtran(d)) { + if (buf1 != end1 && buf2 != end2 && + chrtran(*buf1) != chrtran(*buf2)) { jackpot++; J[i] = 0; - if (c != '\n' && c != EOF) - ctold += skipline(f1); - if (d != '\n' && c != EOF) - ctnew += skipline(f2); + if (*buf1 != '\n') // XXX + ctold += skipline(&buf1, end1); + if (*buf2 != '\n') // XXX + ctnew += skipline(&buf2, end2); break; } - if (c == '\n' || c == EOF) + if (buf1 == end1 || *buf1 == '\n') { + buf1++; + buf2++; break; + } } } else { - for (;;) { + for (;;buf1++, buf2++) { ctold++; ctnew++; - if ((c = getc(f1)) != (d = getc(f2))) { + if (buf1 != end1 && buf2 != end2 && + *buf1 != *buf2) { /* jackpot++; */ J[i] = 0; - if (c != '\n' && c != EOF) - ctold += skipline(f1); - if (d != '\n' && c != EOF) - ctnew += skipline(f2); + if (buf1 != end1 && *buf1 != '\n') + ctold += skipline(&buf1, end1); + if (buf1 != end1 && *buf2 != '\n') + ctnew += skipline(&buf2, end2); break; } - if (c == '\n' || c == EOF) + if (buf1 == end1 || *buf1 == '\n') { + buf1++; + buf2++; break; + } } } ixold[i] = ctold; @@ -824,7 +835,7 @@ j++; } for (; j <= len[1]; j++) { - ixnew[j] = ctnew += skipline(f2); + ixnew[j] = ctnew += skipline(&buf2, end2); } /* * if (jackpot) @@ -879,12 +890,17 @@ } static int -skipline(FILE *f) +skipline(const uint8_t **buf, const uint8_t *end) { - int i, c; + int i = 0; + const uint8_t *walk = *buf; - for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) - continue; + walk = memchr(walk, '\n', end - walk); + if (walk != NULL) { + walk++; + i = walk - *buf; + } + *buf = walk; return (i); } @@ -1342,21 +1358,24 @@ * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. */ static int -readhash(FILE *f, int flags) +readhash(const uint8_t **buf, const uint8_t *end, int flags) { - int i, t, space; + int t, space; int sum; + const uint8_t *walk = *buf; sum = 1; space = 0; - for (i = 0;;) { - switch (t = getc(f)) { + if (walk == end) + return (0); + for (; walk < end; walk++) { + switch (t = *walk) { case '\r': if (flags & D_STRIPCR) { - t = getc(f); - if (t == '\n') + if (walk + 1 != end && walk[1] == '\n') { + walk += 2; break; - ungetc(t, f); + } } /* FALLTHROUGH */ case '\t': @@ -1370,21 +1389,18 @@ /* FALLTHROUGH */ default: if (space && (flags & D_IGNOREBLANKS) == 0) { - i++; space = 0; } sum = sum * 127 + chrtran(t); - i++; continue; - case EOF: - if (i == 0) - return (0); /* FALLTHROUGH */ case '\n': + walk++; break; } break; } + *buf = walk; /* * There is a remote possibility that we end up with a zero sum. * Zero is used as an EOF marker, so return 1 instead. @@ -1393,17 +1409,10 @@ } static int -asciifile(FILE *f) +asciifile(const uint8_t *buf, size_t filesize) { - unsigned char buf[BUFSIZ]; - size_t cnt; - - if (f == NULL) - return (1); - rewind(f); - cnt = fread(buf, 1, sizeof(buf), f); - return (memchr(buf, '\0', cnt) == NULL); + return (memchr(buf, '\0', MIN(BUFSIZ, filesize)) == NULL); } #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)