Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F160187727
D16998.id67848.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D16998.id67848.diff
View Options
Index: etc/mtree/BSD.tests.dist
===================================================================
--- etc/mtree/BSD.tests.dist
+++ etc/mtree/BSD.tests.dist
@@ -1072,6 +1072,8 @@
..
vmstat
..
+ wc
+ ..
xargs
..
xinstall
Index: usr.bin/cmp/Makefile
===================================================================
--- usr.bin/cmp/Makefile
+++ usr.bin/cmp/Makefile
@@ -3,10 +3,14 @@
.include <src.opts.mk>
+.PATH: ${SRCTOP}/usr.bin/wc
+
PROG= cmp
-SRCS= cmp.c link.c misc.c regular.c special.c
+SRCS= cmp.c link.c misc.c regular.c special.c countnl.c
HAS_TESTS=
SUBDIR.${MK_TESTS}+= tests
+CFLAGS+= -I${SRCTOP}/usr.bin/wc
+
.include <bsd.prog.mk>
Index: usr.bin/cmp/extern.h
===================================================================
--- usr.bin/cmp/extern.h
+++ usr.bin/cmp/extern.h
@@ -41,6 +41,9 @@
void c_link(const char *, off_t, const char *, off_t);
void c_regular(int, const char *, off_t, off_t, int, const char *, off_t, off_t);
void c_special(int, const char *, off_t, int, const char *, off_t);
+int c_chunk(const char *, const char *,
+ u_char *restrict, u_char *restrict, size_t,
+ off_t *, off_t *, int *);
void diffmsg(const char *, const char *, off_t, off_t);
void eofmsg(const char *);
Index: usr.bin/cmp/misc.c
===================================================================
--- usr.bin/cmp/misc.c
+++ usr.bin/cmp/misc.c
@@ -39,12 +39,15 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/param.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include "extern.h"
+#include "countnl.h"
void
eofmsg(const char *file)
@@ -62,3 +65,71 @@
file1, file2, (long long)byte, (long long)line);
exit(DIFF_EXIT);
}
+
+static void
+founddiff(const char *file1, const char *file2,
+ u_char ch1, u_char ch2,
+ off_t byte, off_t line) {
+ if (xflag) {
+ (void)printf("%08llx %02x %02x\n",
+ (long long)byte - 1, ch1, ch2);
+ } else if (lflag) {
+ (void)printf("%6lld %3o %3o\n",
+ (long long)byte, ch1, ch2);
+ } else {
+ diffmsg(file1, file2, byte, line);
+ /* NOTREACHED */
+ }
+}
+
+int
+c_chunk(const char *file1, const char *file2,
+ u_char *restrict p1, u_char *restrict p2, size_t chunk_size,
+ off_t *obyte, off_t *oline, int *tryfast) {
+ size_t cbyte, cline;
+ u_char ch1, ch2;
+ int dfound;
+ size_t piece_size, piece_len;
+ /* If the chunk is too large, process it in smaller pieces. */
+ for (piece_size = chunk_size >= PAGE_SIZE * 2 ? PAGE_SIZE : chunk_size;
+ chunk_size;
+ chunk_size -= piece_len, *obyte += cbyte, *oline += cline) {
+ piece_len = MIN(chunk_size, piece_size);
+ if (*tryfast) {
+ /* The memcmp() is all that is needed in
+ silent mode, otherwise it is done
+ opportunistically to skip over pieces that
+ have no differences. */
+ if ((dfound = memcmp(p1, p2, piece_len) != 0)) {
+ if (sflag)
+ exit(DIFF_EXIT);
+ } else {
+ cbyte = piece_len;
+ if (!sflag && !lflag) {
+ cline = count_newlines(p1, piece_len);
+ } else
+ cline = 0;
+ p1 += piece_len;
+ p2 += piece_len;
+ continue;
+ }
+ } else
+ dfound = 0;
+ for (cline = 0, cbyte = 0; cbyte != piece_len; cbyte++) {
+ if ((ch1 = *p1++) != (ch2 = *p2++)) {
+ dfound = 1;
+ /* NOTE: founddiff() exits if only one
+ * difference needs to be reported */
+ founddiff(file1, file2, ch1, ch2,
+ *obyte + cbyte, *oline + cline);
+ }
+ if (ch1 == '\n')
+ ++cline;
+ }
+ /* Avoid the (not necessarily so fast) fast path for
+ the next piece if we might be in a region with a lot of
+ differences. */
+ *tryfast = !dfound;
+ }
+ return dfound;
+}
Index: usr.bin/cmp/regular.c
===================================================================
--- usr.bin/cmp/regular.c
+++ usr.bin/cmp/regular.c
@@ -64,11 +64,12 @@
{
struct sigaction act, oact;
cap_rights_t rights;
- u_char ch, *p1, *p2, *m1, *m2, *e1, *e2;
+ u_char *p1, *p2, *m1, *m2, *e1, *e2;
off_t byte, length, line;
+ size_t len;
+ int dfound, tryfast;
off_t pagemask, off1, off2;
size_t pagesize;
- int dfound;
if (skip1 > len1)
eofmsg(file1);
@@ -112,28 +113,19 @@
err(ERR_EXIT, "sigaction()");
dfound = 0;
+ tryfast = 1;
e1 = m1 + MMAP_CHUNK;
e2 = m2 + MMAP_CHUNK;
p1 = m1 + (skip1 - off1);
p2 = m2 + (skip2 - off2);
+ byte = line = 1;
- for (byte = line = 1; length--; ++byte) {
- if ((ch = *p1) != *p2) {
- if (xflag) {
- dfound = 1;
- (void)printf("%08llx %02x %02x\n",
- (long long)byte - 1, ch, *p2);
- } else if (lflag) {
- dfound = 1;
- (void)printf("%6lld %3o %3o\n",
- (long long)byte, ch, *p2);
- } else
- diffmsg(file1, file2, byte, line);
- /* NOTREACHED */
- }
- if (ch == '\n')
- ++line;
- if (++p1 == e1) {
+ while (length) {
+ len = MIN(MIN(e1 - p1, e2 - p2), length);
+ if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast))
+ dfound = 1;
+ length -= len;
+ if ((p1 += len) == e1 && length) {
off1 += MMAP_CHUNK;
if ((p1 = m1 = remmap(m1, fd1, off1)) == NULL) {
munmap(m2, MMAP_CHUNK);
@@ -141,7 +133,7 @@
}
e1 = m1 + MMAP_CHUNK;
}
- if (++p2 == e2) {
+ if ((p2 += len) == e2 && length) {
off2 += MMAP_CHUNK;
if ((p2 = m2 = remmap(m2, fd2, off2)) == NULL) {
munmap(m1, MMAP_CHUNK);
Index: usr.bin/cmp/special.c
===================================================================
--- usr.bin/cmp/special.c
+++ usr.bin/cmp/special.c
@@ -38,23 +38,62 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/param.h>
#include <sys/types.h>
#include <capsicum_helpers.h>
#include <err.h>
#include <stdlib.h>
#include <stdio.h>
+#include <unistd.h>
#include "extern.h"
+static int
+skip_helper(int fd, const char *file, off_t skip) {
+ char buf[MAXBSIZE];
+ int r;
+ size_t n;
+ while (skip) {
+ n = MIN(skip, (off_t)(sizeof buf));
+ r = read(fd, buf, n);
+ if (r <= 0) {
+ if (r < 0)
+ err(ERR_EXIT, "%s", file);
+ return 1;
+ }
+ skip -= r;
+ }
+ return 0;
+}
+
+static void
+read_helper(int fd, const char *file,
+ u_char *buf, size_t size,
+ u_char **p, u_char **e, int *eof)
+{
+ int r;
+ if (*p == *e) {
+ *p = *e = buf;
+ r = read(fd, *p, size);
+ if ((*eof = r <= 0)) {
+ if (r < 0)
+ err(ERR_EXIT, "%s", file);
+ } else
+ *e += r;
+ }
+}
+
void
c_special(int fd1, const char *file1, off_t skip1,
int fd2, const char *file2, off_t skip2)
{
- int ch1, ch2;
off_t byte, line;
- FILE *fp1, *fp2;
- int dfound;
+ int dfound, tryfast;
+ u_char b1[MAXBSIZE], b2[MAXBSIZE];
+ u_char *p1, *p2, *e1, *e2;
+ size_t len;
+ int eof1, eof2;
if (caph_limit_stream(fd1, CAPH_READ) < 0)
err(ERR_EXIT, "caph_limit_stream(%s)", file1);
@@ -63,54 +102,35 @@
if (caph_enter() < 0)
err(ERR_EXIT, "unable to enter capability mode");
- if ((fp1 = fdopen(fd1, "r")) == NULL)
- err(ERR_EXIT, "%s", file1);
- if ((fp2 = fdopen(fd2, "r")) == NULL)
- err(ERR_EXIT, "%s", file2);
-
dfound = 0;
- while (skip1--)
- if (getc(fp1) == EOF)
- goto eof;
- while (skip2--)
- if (getc(fp2) == EOF)
- goto eof;
+ if ((eof1 = skip_helper(fd1, file1, skip1)))
+ goto eof;
+ if ((eof2 = skip_helper(fd2, file2, skip2)))
+ goto eof;
- for (byte = line = 1;; ++byte) {
- ch1 = getc(fp1);
- ch2 = getc(fp2);
- if (ch1 == EOF || ch2 == EOF)
+ p1 = e1 = b1;
+ p2 = e2 = b2;
+ byte = line = 1;
+ tryfast = 1;
+
+ for (;;) {
+ read_helper(fd1, file1, b1, sizeof b1, &p1, &e1, &eof1);
+ read_helper(fd2, file2, b2, sizeof b2, &p2, &e2, &eof2);
+ if (eof1 || eof2)
break;
- if (ch1 != ch2) {
- if (xflag) {
- dfound = 1;
- (void)printf("%08llx %02x %02x\n",
- (long long)byte - 1, ch1, ch2);
- } else if (lflag) {
- dfound = 1;
- (void)printf("%6lld %3o %3o\n",
- (long long)byte, ch1, ch2);
- } else {
- diffmsg(file1, file2, byte, line);
- /* NOTREACHED */
- }
- }
- if (ch1 == '\n')
- ++line;
+ len = MIN(e1 - p1, e2 - p2);
+ if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast))
+ dfound = 1;
+ p1 += len;
+ p2 += len;
}
-eof: if (ferror(fp1))
- err(ERR_EXIT, "%s", file1);
- if (ferror(fp2))
- err(ERR_EXIT, "%s", file2);
- if (feof(fp1)) {
- if (!feof(fp2))
+eof: if (eof1) {
+ if (!eof2)
eofmsg(file1);
} else
- if (feof(fp2))
+ if (eof2)
eofmsg(file2);
- fclose(fp2);
- fclose(fp1);
if (dfound)
exit(DIFF_EXIT);
}
Index: usr.bin/wc/Makefile
===================================================================
--- usr.bin/wc/Makefile
+++ usr.bin/wc/Makefile
@@ -12,4 +12,10 @@
CFLAGS+=-DWITH_CASPER
.endif
+SRCS= wc.c countnl.c
+
+.if ${MK_TESTS} != "no"
+SUBDIR+= tests
+.endif
+
.include <bsd.prog.mk>
Index: usr.bin/wc/countnl.h
===================================================================
--- usr.bin/wc/countnl.h
+++ usr.bin/wc/countnl.h
@@ -0,0 +1,3 @@
+#include <stdlib.h>
+
+size_t count_newlines(u_char *restrict, size_t);
Index: usr.bin/wc/countnl.c
===================================================================
--- usr.bin/wc/countnl.c
+++ usr.bin/wc/countnl.c
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/limits.h>
+
+#include "countnl.h"
+
+size_t
+count_newlines(u_char *restrict p, size_t l) {
+ size_t n = 0;
+#if CHAR_BIT == 8 /* VROOM VROOM */
+ /* Using long long makes this slower than the naive algorithm
+ * on some CPUs. Let's assume that long is the largest
+ * integer type that the CPU can work with efficiently. */
+ typedef unsigned long word;
+ word *restrict wp;
+ size_t wl;
+ /* Only do it if we've got at least a few words to process. */
+ if (l > sizeof (word) * 10) {
+ /* Align to word boundary. */
+ for (; ((uintptr_t)p & (sizeof(word) - 1)) && l; l--)
+ n += *p++ == '\n';
+ /* Process one word at a time. */
+ wp = (void *)p;
+ wl = l / sizeof (word);
+ l -= wl * sizeof (word);
+ p += wl * sizeof (word);
+ while (wl--) {
+ /* This is from the "Bit Twiddling Hacks" page,
+ * "Determine if a word has a byte less than n" using
+ * one of their tricks to turn newline bytes into zero
+ * bytes and then counting for bytes less than one.
+ * It just works.
+ * <https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord>
+ */
+ word wv = *wp++ ^ ~(word)0/255 * (word)'\n';
+ n += ((~(word)0/255*(127+(1)) - ((wv)&~(word)0/255*127))
+ & ~(wv) & ~(word)0/255*128) / 128 % 255;
+ }
+ /* Leftovers handled below. */
+ }
+#endif
+ while (l--)
+ n += *p++ == '\n';
+ return n;
+}
+
Index: usr.bin/wc/tests/Makefile
===================================================================
--- usr.bin/wc/tests/Makefile
+++ usr.bin/wc/tests/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD: stable/11/bin/chmod/tests/Makefile 319642 2017-06-07 05:33:56Z ngie $
+
+PACKAGE= tests
+
+ATF_TESTS_SH+= linecnt
+
+${PACKAGE}FILES+= linecnt.in
+
+.include <bsd.test.mk>
Index: usr.bin/wc/tests/linecnt.in
===================================================================
--- usr.bin/wc/tests/linecnt.in
+++ usr.bin/wc/tests/linecnt.in
@@ -0,0 +1,27 @@
+abcdefgh
+abcde
+abcdefghijklmnopqrs
+abcdefghijklmno
+abcdefghijklmnopqrstuvwx
+abcdefghijklmnop
+ab
+abcdefghi
+abc
+abcdefghijklm
+abcdefghijklmnopqrst
+abcdefghijklmnopqrstu
+abcdefghijkl
+a
+abcdef
+abcdefghijklmnopqrstuvw
+
+abcdefghijklmnopqrstuv
+abcdefghijklmnopq
+abcdefghijk
+abcdefghijklmnopqr
+abcdefghijklmnopqrstuvwxyz
+abcdefghijklmnopqrstuvwxy
+abcd
+abcdefghij
+abcdefghijklmn
+abcdefg
Index: usr.bin/wc/tests/linecnt.sh
===================================================================
--- usr.bin/wc/tests/linecnt.sh
+++ usr.bin/wc/tests/linecnt.sh
@@ -0,0 +1,17 @@
+
+atf_test_case linecnt
+linecnt_head() {
+ atf_set "descr" "Test wc(1)'s line counting fast path"
+}
+linecnt_body() {
+ atf_check -o match:'^ *0$' wc -l < /dev/null
+ atf_check -o match:'^ *0 +0$' wc -lL < /dev/null
+ atf_check -o match:'^ *27$' wc -l < "$(atf_get_srcdir)/linecnt.in"
+ atf_check -o match:'^ *27 +26$' wc -lL < "$(atf_get_srcdir)/linecnt.in"
+ true
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case linecnt
+}
Index: usr.bin/wc/wc.c
===================================================================
--- usr.bin/wc/wc.c
+++ usr.bin/wc/wc.c
@@ -66,6 +66,8 @@
#include <libcasper.h>
#include <casper/cap_fileargs.h>
+#include "countnl.h"
+
static fileargs_t *fa;
static uintmax_t tlinect, twordct, tcharct, tlongline;
static int doline, doword, dochar, domulti, dolongline;
@@ -274,7 +276,7 @@
if (siginfo)
show_cnt(file, linect, wordct, charct, llct);
charct += len;
- if (doline || dolongline) {
+ if (dolongline) {
for (p = buf; len--; ++p)
if (*p == '\n') {
if (tmpll > llct)
@@ -283,7 +285,8 @@
++linect;
} else
tmpll++;
- }
+ } else if (doline)
+ linect += count_newlines(buf, len);
}
reset_siginfo();
if (doline)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jun 23, 12:34 AM (21 h, 21 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34221460
Default Alt Text
D16998.id67848.diff (12 KB)
Attached To
Mode
D16998: making cmp faster, especially on special files (and speed up wc -l too while at it)
Attached
Detach File
Event Timeline
Log In to Comment