Index: etc/mtree/BSD.tests.dist =================================================================== --- etc/mtree/BSD.tests.dist +++ etc/mtree/BSD.tests.dist @@ -1008,6 +1008,8 @@ .. vmstat .. + wc + .. xargs .. xinstall Index: usr.bin/cmp/Makefile =================================================================== --- usr.bin/cmp/Makefile +++ usr.bin/cmp/Makefile @@ -3,10 +3,14 @@ .include +.PATH: ${SRCTOP}/usr.bin/wc + PROG= cmp -SRCS= cmp.c link.c misc.c regular.c special.c +SRCS= cmp.c link.c misc.c regular.c special.c countnl.c HAS_TESTS= SUBDIR.${MK_TESTS}+= tests +CFLAGS+= -I${SRCTOP}/usr.bin/wc + .include Index: usr.bin/cmp/extern.h =================================================================== --- usr.bin/cmp/extern.h +++ usr.bin/cmp/extern.h @@ -41,6 +41,9 @@ void c_link(const char *, off_t, const char *, off_t); void c_regular(int, const char *, off_t, off_t, int, const char *, off_t, off_t); void c_special(int, const char *, off_t, int, const char *, off_t); +int c_chunk(const char *, const char *, + u_char *restrict, u_char *restrict, size_t, + off_t *, off_t *, int *); void diffmsg(const char *, const char *, off_t, off_t); void eofmsg(const char *); Index: usr.bin/cmp/misc.c =================================================================== --- usr.bin/cmp/misc.c +++ usr.bin/cmp/misc.c @@ -39,12 +39,15 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include +#include #include "extern.h" +#include "countnl.h" void eofmsg(const char *file) @@ -62,3 +65,69 @@ file1, file2, (long long)byte, (long long)line); exit(DIFF_EXIT); } + +static void +founddiff(const char *file1, const char *file2, + u_char ch1, u_char ch2, + off_t byte, off_t line) { + if (xflag) { + (void)printf("%08llx %02x %02x\n", + (long long)byte - 1, ch1, ch2); + } else if (lflag) { + (void)printf("%6lld %3o %3o\n", + (long long)byte, ch1, ch2); + } else { + diffmsg(file1, file2, byte, line); + /* NOTREACHED */ + } +} + +int +c_chunk(const char *file1, const char *file2, + u_char *restrict p1, u_char *restrict p2, size_t chunk_size, + off_t *obyte, off_t *oline, int *tryfast) { + size_t cbyte, cline; + u_char ch1, ch2; + int dfound; + size_t piece_size, piece_len; + /* If the chunk is too large, process it in smaller pieces. */ + for (piece_size = chunk_size >= PAGE_SIZE * 2 ? PAGE_SIZE : chunk_size; + chunk_size; + chunk_size -= piece_len, *obyte += cbyte, *oline += cline) { + piece_len = MIN(chunk_size, piece_size); + if (*tryfast) { + /* The memcmp() is all that is needed in + silent mode, otherwise it is done + opportunistically to skip over pieces that + have no differences. */ + if ((dfound = memcmp(p1, p2, piece_len) != 0)) { + if (sflag) + exit(DIFF_EXIT); + } else { + cbyte = piece_len; + if (!sflag && !lflag) { + cline = count_newlines(p1, piece_len); + } else + cline = 0; + p1 += piece_len; + p2 += piece_len; + continue; + } + } else + dfound = 0; + for (cline = 0, cbyte = 0; cbyte != piece_len; cbyte++) { + if ((ch1 = *p1++) != (ch2 = *p2++)) { + dfound = 1; + founddiff(file1, file2, ch1, ch2, + *obyte + cbyte, *oline + cline); + } + if (ch1 == '\n') + ++cline; + } + /* Avoid the (not necessarily so fast) fast path for + this piece if it seems like we might have to do a + full compare anyway. */ + *tryfast = !dfound; + } + return dfound; +} Index: usr.bin/cmp/regular.c =================================================================== --- usr.bin/cmp/regular.c +++ usr.bin/cmp/regular.c @@ -61,9 +61,10 @@ c_regular(int fd1, const char *file1, off_t skip1, off_t len1, int fd2, const char *file2, off_t skip2, off_t len2) { - u_char ch, *p1, *p2, *m1, *m2, *e1, *e2; + u_char *p1, *p2, *m1, *m2, *e1, *e2; off_t byte, length, line; - int dfound; + size_t len; + int dfound, tryfast; off_t pagemask, off1, off2; size_t pagesize; struct sigaction act, oact; @@ -103,28 +104,19 @@ } dfound = 0; + tryfast = 1; e1 = m1 + MMAP_CHUNK; e2 = m2 + MMAP_CHUNK; p1 = m1 + (skip1 - off1); p2 = m2 + (skip2 - off2); - - for (byte = line = 1; length--; ++byte) { - if ((ch = *p1) != *p2) { - if (xflag) { - dfound = 1; - (void)printf("%08llx %02x %02x\n", - (long long)byte - 1, ch, *p2); - } else if (lflag) { - dfound = 1; - (void)printf("%6lld %3o %3o\n", - (long long)byte, ch, *p2); - } else - diffmsg(file1, file2, byte, line); - /* NOTREACHED */ - } - if (ch == '\n') - ++line; - if (++p1 == e1) { + byte = line = 1; + + while (length) { + len = MIN(MIN(e1 - p1, e2 - p2), length); + if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast)) + dfound = 1; + length -= len; + if ((p1 += len) == e1 && length) { off1 += MMAP_CHUNK; if ((p1 = m1 = remmap(m1, fd1, off1)) == NULL) { munmap(m2, MMAP_CHUNK); @@ -132,7 +124,7 @@ } e1 = m1 + MMAP_CHUNK; } - if (++p2 == e2) { + if ((p2 += len) == e2 && length) { off2 += MMAP_CHUNK; if ((p2 = m2 = remmap(m2, fd2, off2)) == NULL) { munmap(m1, MMAP_CHUNK); Index: usr.bin/cmp/special.c =================================================================== --- usr.bin/cmp/special.c +++ usr.bin/cmp/special.c @@ -38,71 +38,94 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include #include +#include #include "extern.h" +static int +skip_helper(int fd, const char *file, off_t skip) { + char buf[MAXBSIZE]; + int r; + size_t n; + while (skip) { + n = MIN(skip, (off_t)(sizeof buf)); + r = read(fd, buf, n); + if (r <= 0) { + if (r < 0) + err(ERR_EXIT, "%s", file); + return 1; + } + skip -= n; + } + return 0; +} + +static int +read_helper(int fd, const char *file, + u_char *buf, size_t size, + u_char **cur, u_char **lim) +{ + int r; + *cur = *lim = buf; + r = read(fd, *cur, size); + if (r <= 0) { + if (r < 0) + err(ERR_EXIT, "%s", file); + return 1; + } + *lim += r; + return 0; +} + void c_special(int fd1, const char *file1, off_t skip1, int fd2, const char *file2, off_t skip2) { - int ch1, ch2; off_t byte, line; - FILE *fp1, *fp2; - int dfound; - - if ((fp1 = fdopen(fd1, "r")) == NULL) - err(ERR_EXIT, "%s", file1); - if ((fp2 = fdopen(fd2, "r")) == NULL) - err(ERR_EXIT, "%s", file2); + int dfound, tryfast; + u_char b1[MAXBSIZE], b2[MAXBSIZE]; + u_char *p1, *p2, *e1, *e2; + size_t len; + int eof1, eof2; dfound = 0; - while (skip1--) - if (getc(fp1) == EOF) - goto eof; - while (skip2--) - if (getc(fp2) == EOF) - goto eof; - for (byte = line = 1;; ++byte) { - ch1 = getc(fp1); - ch2 = getc(fp2); - if (ch1 == EOF || ch2 == EOF) + if ((eof1 = skip_helper(fd1, file1, skip1))) + goto eof; + if ((eof2 = skip_helper(fd2, file2, skip2))) + goto eof; + + p1 = e1 = b1; + p2 = e2 = b2; + byte = line = 1; + tryfast = 1; + + for (;;) { + if (p1 == e1) + eof1 = read_helper(fd1, file1, b1, sizeof b1, &p1, &e1); + if (p2 == e2) + eof2 = read_helper(fd2, file2, b2, sizeof b2, &p2, &e2); + if (eof1 || eof2) break; - if (ch1 != ch2) { - if (xflag) { - dfound = 1; - (void)printf("%08llx %02x %02x\n", - (long long)byte - 1, ch1, ch2); - } else if (lflag) { - dfound = 1; - (void)printf("%6lld %3o %3o\n", - (long long)byte, ch1, ch2); - } else { - diffmsg(file1, file2, byte, line); - /* NOTREACHED */ - } - } - if (ch1 == '\n') - ++line; + len = MIN(e1 - p1, e2 - p2); + if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast)) + dfound = 1; + p1 += len; + p2 += len; } -eof: if (ferror(fp1)) - err(ERR_EXIT, "%s", file1); - if (ferror(fp2)) - err(ERR_EXIT, "%s", file2); - if (feof(fp1)) { - if (!feof(fp2)) +eof: if (eof1) { + if (!eof2) eofmsg(file1); } else - if (feof(fp2)) + if (eof2) eofmsg(file2); - fclose(fp2); - fclose(fp1); if (dfound) exit(DIFF_EXIT); } Index: usr.bin/cmp/tests/cmp_test2.sh =================================================================== --- usr.bin/cmp/tests/cmp_test2.sh +++ usr.bin/cmp/tests/cmp_test2.sh @@ -29,12 +29,22 @@ atf_set "descr" "Test cmp(1)'s handling of non-regular files" } special_body() { + # NOTE: Cannot use pipelines for this because FreeBSD's sh(1) + # executes the last component of a pipeline in a subprocess + # (which makes atf_check not register errors since its state + # is kept in the main process). echo 0123456789abcdef > a echo 0123456789abcdeg > b - cat a | atf_check -s exit:0 cmp a - - cat a | atf_check -s exit:0 cmp - a - cat b | atf_check -s not-exit:0 cmp a - - cat b | atf_check -s not-exit:0 cmp - a + mkfifo a.fifo b.fifo + cat a > a.fifo & + atf_check -s exit:0 cmp a a.fifo + cat a > a.fifo & + atf_check -s exit:0 cmp a.fifo a + cat b > b.fifo & + atf_check -o not-empty -s not-exit:0 cmp a b.fifo + cat b > b.fifo & + atf_check -o not-empty -s not-exit:0 cmp b.fifo a + wait true } Index: usr.bin/wc/Makefile =================================================================== --- usr.bin/wc/Makefile +++ usr.bin/wc/Makefile @@ -1,7 +1,15 @@ # @(#)Makefile 8.1 (Berkeley) 6/6/93 # $FreeBSD$ +.include + PROG= wc LIBADD= xo +SRCS= wc.c countnl.c + +.if ${MK_TESTS} != "no" +SUBDIR+= tests +.endif + .include Index: usr.bin/wc/countnl.h =================================================================== --- /dev/null +++ usr.bin/wc/countnl.h @@ -0,0 +1,3 @@ +#include + +size_t count_newlines(u_char *restrict, size_t); Index: usr.bin/wc/countnl.c =================================================================== --- /dev/null +++ usr.bin/wc/countnl.c @@ -0,0 +1,47 @@ +#include +#include +#include + +#include "countnl.h" + +size_t +count_newlines(u_char *restrict p, size_t l) { + size_t n = 0; +#if CHAR_BIT == 8 /* VROOM VROOM */ + /* Using long long makes this slower than the naive algorithm + * on some CPUs. Let's assume that long is the largest + * integer type that the CPU can work with efficiently. */ + typedef unsigned long word; + word *restrict wp; + size_t wl; + /* Only do it if we've got at least a few words to process. */ + if (l > sizeof (word) * 4) { + /* Align to word boundary. */ + for (; ((uintptr_t)p & (sizeof(word) - 1)) && l; l--) + n += *p++ == '\n'; + /* Process one word at a time. */ + wp = (void *)p; + wl = l / sizeof (word); + l -= wl * sizeof (word); + p += wl * sizeof (word); + while (wl--) { + /* This is from the "Bit Twiddling Hacks" + * page, "Determine if a word has a byte less + * than n" using one of their tricks to turn + * newline bytes into zero bytes and then + * counting for bytes less than one. No, I do + * not understand it, but it works. + * + */ + word wv = *wp++ ^ ~(word)0/255 * (word)'\n'; + n += ((~(word)0/255*(127+(1)) - ((wv)&~(word)0/255*127)) + & ~(wv) & ~(word)0/255*128) / 128 % 255; + } + /* Leftovers handled below. */ + } +#endif + while (l--) + n += *p++ == '\n'; + return n; +} + Index: usr.bin/wc/tests/Makefile =================================================================== --- /dev/null +++ usr.bin/wc/tests/Makefile @@ -0,0 +1,9 @@ +# $FreeBSD: stable/11/bin/chmod/tests/Makefile 319642 2017-06-07 05:33:56Z ngie $ + +PACKAGE= tests + +ATF_TESTS_SH+= linecnt + +${PACKAGE}FILES+= linecnt.in + +.include Index: usr.bin/wc/tests/linecnt.in =================================================================== --- /dev/null +++ usr.bin/wc/tests/linecnt.in @@ -0,0 +1,27 @@ +abcdefgh +abcde +abcdefghijklmnopqrs +abcdefghijklmno +abcdefghijklmnopqrstuvwx +abcdefghijklmnop +ab +abcdefghi +abc +abcdefghijklm +abcdefghijklmnopqrst +abcdefghijklmnopqrstu +abcdefghijkl +a +abcdef +abcdefghijklmnopqrstuvw + +abcdefghijklmnopqrstuv +abcdefghijklmnopq +abcdefghijk +abcdefghijklmnopqr +abcdefghijklmnopqrstuvwxyz +abcdefghijklmnopqrstuvwxy +abcd +abcdefghij +abcdefghijklmn +abcdefg Index: usr.bin/wc/tests/linecnt.sh =================================================================== --- /dev/null +++ usr.bin/wc/tests/linecnt.sh @@ -0,0 +1,17 @@ + +atf_test_case linecnt +linecnt_head() { + atf_set "descr" "Test wc(1)'s line counting fast path" +} +linecnt_body() { + atf_check -o match:'^ *0$' wc -l < /dev/null + atf_check -o match:'^ *0 +0$' wc -lL < /dev/null + atf_check -o match:'^ *27$' wc -l < "$(atf_get_srcdir)/linecnt.in" + atf_check -o match:'^ *27 +26$' wc -lL < "$(atf_get_srcdir)/linecnt.in" + true +} + +atf_init_test_cases() +{ + atf_add_test_case linecnt +} Index: usr.bin/wc/wc.c =================================================================== --- usr.bin/wc/wc.c +++ usr.bin/wc/wc.c @@ -61,6 +61,8 @@ #include #include +#include "countnl.h" + static uintmax_t tlinect, twordct, tcharct, tlongline; static int doline, doword, dochar, domulti, dolongline; static volatile sig_atomic_t siginfo; @@ -218,7 +220,7 @@ */ if (doline == 0 && dolongline == 0) { if (fstat(fd, &sb)) { - xo_warn("%s: fstat", file); + xo_warn("%s: fstat", file != NULL ? file : "stdin"); (void)close(fd); return (1); } @@ -239,14 +241,14 @@ */ while ((len = read(fd, buf, MAXBSIZE))) { if (len == -1) { - xo_warn("%s: read", file); + xo_warn("%s: read", file != NULL ? file : "stdin"); (void)close(fd); return (1); } if (siginfo) show_cnt(file, linect, wordct, charct, llct); charct += len; - if (doline || dolongline) { + if (dolongline) { for (p = buf; len--; ++p) if (*p == '\n') { if (tmpll > llct) @@ -255,7 +257,8 @@ ++linect; } else tmpll++; - } + } else if (doline) + linect += count_newlines(buf, len); } reset_siginfo(); if (doline)