Index: etc/mtree/BSD.tests.dist =================================================================== --- etc/mtree/BSD.tests.dist +++ etc/mtree/BSD.tests.dist @@ -1072,6 +1072,8 @@ .. vmstat .. + wc + .. xargs .. xinstall Index: usr.bin/cmp/Makefile =================================================================== --- usr.bin/cmp/Makefile +++ usr.bin/cmp/Makefile @@ -3,10 +3,14 @@ .include +.PATH: ${SRCTOP}/usr.bin/wc + PROG= cmp -SRCS= cmp.c link.c misc.c regular.c special.c +SRCS= cmp.c link.c misc.c regular.c special.c countnl.c HAS_TESTS= SUBDIR.${MK_TESTS}+= tests +CFLAGS+= -I${SRCTOP}/usr.bin/wc + .include Index: usr.bin/cmp/extern.h =================================================================== --- usr.bin/cmp/extern.h +++ usr.bin/cmp/extern.h @@ -41,6 +41,9 @@ void c_link(const char *, off_t, const char *, off_t); void c_regular(int, const char *, off_t, off_t, int, const char *, off_t, off_t); void c_special(int, const char *, off_t, int, const char *, off_t); +int c_chunk(const char *, const char *, + u_char *restrict, u_char *restrict, size_t, + off_t *, off_t *, int *); void diffmsg(const char *, const char *, off_t, off_t); void eofmsg(const char *); Index: usr.bin/cmp/misc.c =================================================================== --- usr.bin/cmp/misc.c +++ usr.bin/cmp/misc.c @@ -39,12 +39,15 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include +#include #include "extern.h" +#include "countnl.h" void eofmsg(const char *file) @@ -62,3 +65,71 @@ file1, file2, (long long)byte, (long long)line); exit(DIFF_EXIT); } + +static void +founddiff(const char *file1, const char *file2, + u_char ch1, u_char ch2, + off_t byte, off_t line) { + if (xflag) { + (void)printf("%08llx %02x %02x\n", + (long long)byte - 1, ch1, ch2); + } else if (lflag) { + (void)printf("%6lld %3o %3o\n", + (long long)byte, ch1, ch2); + } else { + diffmsg(file1, file2, byte, line); + /* NOTREACHED */ + } +} + +int +c_chunk(const char *file1, const char *file2, + u_char *restrict p1, u_char *restrict p2, size_t chunk_size, + off_t *obyte, off_t *oline, int *tryfast) { + size_t cbyte, cline; + u_char ch1, ch2; + int dfound; + size_t piece_size, piece_len; + /* If the chunk is too large, process it in smaller pieces. */ + for (piece_size = chunk_size >= PAGE_SIZE * 2 ? PAGE_SIZE : chunk_size; + chunk_size; + chunk_size -= piece_len, *obyte += cbyte, *oline += cline) { + piece_len = MIN(chunk_size, piece_size); + if (*tryfast) { + /* The memcmp() is all that is needed in + silent mode, otherwise it is done + opportunistically to skip over pieces that + have no differences. */ + if ((dfound = memcmp(p1, p2, piece_len) != 0)) { + if (sflag) + exit(DIFF_EXIT); + } else { + cbyte = piece_len; + if (!sflag && !lflag) { + cline = count_newlines(p1, piece_len); + } else + cline = 0; + p1 += piece_len; + p2 += piece_len; + continue; + } + } else + dfound = 0; + for (cline = 0, cbyte = 0; cbyte != piece_len; cbyte++) { + if ((ch1 = *p1++) != (ch2 = *p2++)) { + dfound = 1; + /* NOTE: founddiff() exits if only one + * difference needs to be reported */ + founddiff(file1, file2, ch1, ch2, + *obyte + cbyte, *oline + cline); + } + if (ch1 == '\n') + ++cline; + } + /* Avoid the (not necessarily so fast) fast path for + the next piece if we might be in a region with a lot of + differences. */ + *tryfast = !dfound; + } + return dfound; +} Index: usr.bin/cmp/regular.c =================================================================== --- usr.bin/cmp/regular.c +++ usr.bin/cmp/regular.c @@ -64,11 +64,12 @@ { struct sigaction act, oact; cap_rights_t rights; - u_char ch, *p1, *p2, *m1, *m2, *e1, *e2; + u_char *p1, *p2, *m1, *m2, *e1, *e2; off_t byte, length, line; + size_t len; + int dfound, tryfast; off_t pagemask, off1, off2; size_t pagesize; - int dfound; if (skip1 > len1) eofmsg(file1); @@ -112,28 +113,19 @@ err(ERR_EXIT, "sigaction()"); dfound = 0; + tryfast = 1; e1 = m1 + MMAP_CHUNK; e2 = m2 + MMAP_CHUNK; p1 = m1 + (skip1 - off1); p2 = m2 + (skip2 - off2); + byte = line = 1; - for (byte = line = 1; length--; ++byte) { - if ((ch = *p1) != *p2) { - if (xflag) { - dfound = 1; - (void)printf("%08llx %02x %02x\n", - (long long)byte - 1, ch, *p2); - } else if (lflag) { - dfound = 1; - (void)printf("%6lld %3o %3o\n", - (long long)byte, ch, *p2); - } else - diffmsg(file1, file2, byte, line); - /* NOTREACHED */ - } - if (ch == '\n') - ++line; - if (++p1 == e1) { + while (length) { + len = MIN(MIN(e1 - p1, e2 - p2), length); + if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast)) + dfound = 1; + length -= len; + if ((p1 += len) == e1 && length) { off1 += MMAP_CHUNK; if ((p1 = m1 = remmap(m1, fd1, off1)) == NULL) { munmap(m2, MMAP_CHUNK); @@ -141,7 +133,7 @@ } e1 = m1 + MMAP_CHUNK; } - if (++p2 == e2) { + if ((p2 += len) == e2 && length) { off2 += MMAP_CHUNK; if ((p2 = m2 = remmap(m2, fd2, off2)) == NULL) { munmap(m1, MMAP_CHUNK); Index: usr.bin/cmp/special.c =================================================================== --- usr.bin/cmp/special.c +++ usr.bin/cmp/special.c @@ -38,23 +38,62 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include #include #include +#include #include "extern.h" +static int +skip_helper(int fd, const char *file, off_t skip) { + char buf[MAXBSIZE]; + int r; + size_t n; + while (skip) { + n = MIN(skip, (off_t)(sizeof buf)); + r = read(fd, buf, n); + if (r <= 0) { + if (r < 0) + err(ERR_EXIT, "%s", file); + return 1; + } + skip -= r; + } + return 0; +} + +static void +read_helper(int fd, const char *file, + u_char *buf, size_t size, + u_char **p, u_char **e, int *eof) +{ + int r; + if (*p == *e) { + *p = *e = buf; + r = read(fd, *p, size); + if ((*eof = r <= 0)) { + if (r < 0) + err(ERR_EXIT, "%s", file); + } else + *e += r; + } +} + void c_special(int fd1, const char *file1, off_t skip1, int fd2, const char *file2, off_t skip2) { - int ch1, ch2; off_t byte, line; - FILE *fp1, *fp2; - int dfound; + int dfound, tryfast; + u_char b1[MAXBSIZE], b2[MAXBSIZE]; + u_char *p1, *p2, *e1, *e2; + size_t len; + int eof1, eof2; if (caph_limit_stream(fd1, CAPH_READ) < 0) err(ERR_EXIT, "caph_limit_stream(%s)", file1); @@ -63,54 +102,35 @@ if (caph_enter() < 0) err(ERR_EXIT, "unable to enter capability mode"); - if ((fp1 = fdopen(fd1, "r")) == NULL) - err(ERR_EXIT, "%s", file1); - if ((fp2 = fdopen(fd2, "r")) == NULL) - err(ERR_EXIT, "%s", file2); - dfound = 0; - while (skip1--) - if (getc(fp1) == EOF) - goto eof; - while (skip2--) - if (getc(fp2) == EOF) - goto eof; + if ((eof1 = skip_helper(fd1, file1, skip1))) + goto eof; + if ((eof2 = skip_helper(fd2, file2, skip2))) + goto eof; - for (byte = line = 1;; ++byte) { - ch1 = getc(fp1); - ch2 = getc(fp2); - if (ch1 == EOF || ch2 == EOF) + p1 = e1 = b1; + p2 = e2 = b2; + byte = line = 1; + tryfast = 1; + + for (;;) { + read_helper(fd1, file1, b1, sizeof b1, &p1, &e1, &eof1); + read_helper(fd2, file2, b2, sizeof b2, &p2, &e2, &eof2); + if (eof1 || eof2) break; - if (ch1 != ch2) { - if (xflag) { - dfound = 1; - (void)printf("%08llx %02x %02x\n", - (long long)byte - 1, ch1, ch2); - } else if (lflag) { - dfound = 1; - (void)printf("%6lld %3o %3o\n", - (long long)byte, ch1, ch2); - } else { - diffmsg(file1, file2, byte, line); - /* NOTREACHED */ - } - } - if (ch1 == '\n') - ++line; + len = MIN(e1 - p1, e2 - p2); + if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast)) + dfound = 1; + p1 += len; + p2 += len; } -eof: if (ferror(fp1)) - err(ERR_EXIT, "%s", file1); - if (ferror(fp2)) - err(ERR_EXIT, "%s", file2); - if (feof(fp1)) { - if (!feof(fp2)) +eof: if (eof1) { + if (!eof2) eofmsg(file1); } else - if (feof(fp2)) + if (eof2) eofmsg(file2); - fclose(fp2); - fclose(fp1); if (dfound) exit(DIFF_EXIT); } Index: usr.bin/wc/Makefile =================================================================== --- usr.bin/wc/Makefile +++ usr.bin/wc/Makefile @@ -12,4 +12,10 @@ CFLAGS+=-DWITH_CASPER .endif +SRCS= wc.c countnl.c + +.if ${MK_TESTS} != "no" +SUBDIR+= tests +.endif + .include Index: usr.bin/wc/countnl.h =================================================================== --- usr.bin/wc/countnl.h +++ usr.bin/wc/countnl.h @@ -0,0 +1,3 @@ +#include + +size_t count_newlines(u_char *restrict, size_t); Index: usr.bin/wc/countnl.c =================================================================== --- usr.bin/wc/countnl.c +++ usr.bin/wc/countnl.c @@ -0,0 +1,46 @@ +#include +#include +#include + +#include "countnl.h" + +size_t +count_newlines(u_char *restrict p, size_t l) { + size_t n = 0; +#if CHAR_BIT == 8 /* VROOM VROOM */ + /* Using long long makes this slower than the naive algorithm + * on some CPUs. Let's assume that long is the largest + * integer type that the CPU can work with efficiently. */ + typedef unsigned long word; + word *restrict wp; + size_t wl; + /* Only do it if we've got at least a few words to process. */ + if (l > sizeof (word) * 10) { + /* Align to word boundary. */ + for (; ((uintptr_t)p & (sizeof(word) - 1)) && l; l--) + n += *p++ == '\n'; + /* Process one word at a time. */ + wp = (void *)p; + wl = l / sizeof (word); + l -= wl * sizeof (word); + p += wl * sizeof (word); + while (wl--) { + /* This is from the "Bit Twiddling Hacks" page, + * "Determine if a word has a byte less than n" using + * one of their tricks to turn newline bytes into zero + * bytes and then counting for bytes less than one. + * It just works. + * + */ + word wv = *wp++ ^ ~(word)0/255 * (word)'\n'; + n += ((~(word)0/255*(127+(1)) - ((wv)&~(word)0/255*127)) + & ~(wv) & ~(word)0/255*128) / 128 % 255; + } + /* Leftovers handled below. */ + } +#endif + while (l--) + n += *p++ == '\n'; + return n; +} + Index: usr.bin/wc/tests/Makefile =================================================================== --- usr.bin/wc/tests/Makefile +++ usr.bin/wc/tests/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD: stable/11/bin/chmod/tests/Makefile 319642 2017-06-07 05:33:56Z ngie $ + +PACKAGE= tests + +ATF_TESTS_SH+= linecnt + +${PACKAGE}FILES+= linecnt.in + +.include Index: usr.bin/wc/tests/linecnt.in =================================================================== --- usr.bin/wc/tests/linecnt.in +++ usr.bin/wc/tests/linecnt.in @@ -0,0 +1,27 @@ +abcdefgh +abcde +abcdefghijklmnopqrs +abcdefghijklmno +abcdefghijklmnopqrstuvwx +abcdefghijklmnop +ab +abcdefghi +abc +abcdefghijklm +abcdefghijklmnopqrst +abcdefghijklmnopqrstu +abcdefghijkl +a +abcdef +abcdefghijklmnopqrstuvw + +abcdefghijklmnopqrstuv +abcdefghijklmnopq +abcdefghijk +abcdefghijklmnopqr +abcdefghijklmnopqrstuvwxyz +abcdefghijklmnopqrstuvwxy +abcd +abcdefghij +abcdefghijklmn +abcdefg Index: usr.bin/wc/tests/linecnt.sh =================================================================== --- usr.bin/wc/tests/linecnt.sh +++ usr.bin/wc/tests/linecnt.sh @@ -0,0 +1,17 @@ + +atf_test_case linecnt +linecnt_head() { + atf_set "descr" "Test wc(1)'s line counting fast path" +} +linecnt_body() { + atf_check -o match:'^ *0$' wc -l < /dev/null + atf_check -o match:'^ *0 +0$' wc -lL < /dev/null + atf_check -o match:'^ *27$' wc -l < "$(atf_get_srcdir)/linecnt.in" + atf_check -o match:'^ *27 +26$' wc -lL < "$(atf_get_srcdir)/linecnt.in" + true +} + +atf_init_test_cases() +{ + atf_add_test_case linecnt +} Index: usr.bin/wc/wc.c =================================================================== --- usr.bin/wc/wc.c +++ usr.bin/wc/wc.c @@ -66,6 +66,8 @@ #include #include +#include "countnl.h" + static fileargs_t *fa; static uintmax_t tlinect, twordct, tcharct, tlongline; static int doline, doword, dochar, domulti, dolongline; @@ -274,7 +276,7 @@ if (siginfo) show_cnt(file, linect, wordct, charct, llct); charct += len; - if (doline || dolongline) { + if (dolongline) { for (p = buf; len--; ++p) if (*p == '\n') { if (tmpll > llct) @@ -283,7 +285,8 @@ ++linect; } else tmpll++; - } + } else if (doline) + linect += count_newlines(buf, len); } reset_siginfo(); if (doline)