Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F100696941
D16998.id47625.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D16998.id47625.diff
View Options
Index: usr.bin/cmp/Makefile
===================================================================
--- usr.bin/cmp/Makefile
+++ usr.bin/cmp/Makefile
@@ -3,11 +3,15 @@
.include <src.opts.mk>
+.PATH: ${SRCTOP}/usr.bin/wc
+
PROG= cmp
-SRCS= cmp.c link.c misc.c regular.c special.c
+SRCS= cmp.c link.c misc.c regular.c special.c countnl.c
.if ${MK_TESTS} != "no"
SUBDIR+= tests
.endif
+CFLAGS+= -I${SRCTOP}/usr.bin/wc
+
.include <bsd.prog.mk>
Index: usr.bin/cmp/extern.h
===================================================================
--- usr.bin/cmp/extern.h
+++ usr.bin/cmp/extern.h
@@ -39,6 +39,9 @@
void c_link(const char *, off_t, const char *, off_t);
void c_regular(int, const char *, off_t, off_t, int, const char *, off_t, off_t);
void c_special(int, const char *, off_t, int, const char *, off_t);
+int c_chunk(const char *, const char *,
+ u_char *restrict, u_char *restrict, size_t,
+ off_t *, off_t *, int *);
void diffmsg(const char *, const char *, off_t, off_t);
void eofmsg(const char *);
Index: usr.bin/cmp/misc.c
===================================================================
--- usr.bin/cmp/misc.c
+++ usr.bin/cmp/misc.c
@@ -37,12 +37,15 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/param.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include "extern.h"
+#include "countnl.h"
void
eofmsg(const char *file)
@@ -60,3 +63,69 @@
file1, file2, (long long)byte, (long long)line);
exit(DIFF_EXIT);
}
+
+static void
+founddiff(const char *file1, const char *file2,
+ u_char ch1, u_char ch2,
+ off_t byte, off_t line) {
+ if (xflag) {
+ (void)printf("%08llx %02x %02x\n",
+ (long long)byte - 1, ch1, ch2);
+ } else if (lflag) {
+ (void)printf("%6lld %3o %3o\n",
+ (long long)byte, ch1, ch2);
+ } else {
+ diffmsg(file1, file2, byte, line);
+ /* NOTREACHED */
+ }
+}
+
+int
+c_chunk(const char *file1, const char *file2,
+ u_char *restrict p1, u_char *restrict p2, size_t chunk_size,
+ off_t *obyte, off_t *oline, int *tryfast) {
+ size_t cbyte, cline;
+ u_char ch1, ch2;
+ int dfound;
+ size_t piece_size, piece_len;
+ /* If the chunk is too large, process it in smaller pieces. */
+ for (piece_size = chunk_size >= PAGE_SIZE * 2 ? PAGE_SIZE : chunk_size;
+ chunk_size;
+ chunk_size -= piece_len, *obyte += cbyte, *oline += cline) {
+ piece_len = MIN(chunk_size, piece_size);
+ if (*tryfast) {
+ /* The memcmp() is all that is needed in
+ silent mode, otherwise it is done
+ opportunistically to skip over pieces that
+ have no differences. */
+ if ((dfound = memcmp(p1, p2, piece_len) != 0)) {
+ if (sflag)
+ exit(DIFF_EXIT);
+ } else {
+ cbyte = piece_len;
+ if (!sflag && !lflag) {
+ cline = count_newlines(p1, piece_len);
+ } else
+ cline = 0;
+ p1 += piece_len;
+ p2 += piece_len;
+ continue;
+ }
+ } else
+ dfound = 0;
+ for (cline = 0, cbyte = 0; cbyte != piece_len; cbyte++) {
+ if ((ch1 = *p1++) != (ch2 = *p2++)) {
+ dfound = 1;
+ founddiff(file1, file2, ch1, ch2,
+ *obyte + cbyte, *oline + cline);
+ }
+ if (ch1 == '\n')
+ ++cline;
+ }
+ /* Avoid the (not necessarily so fast) fast path for
+ this piece if it seems like we might have to do a
+ full compare anyway. */
+ *tryfast = !dfound;
+ }
+ return dfound;
+}
Index: usr.bin/cmp/regular.c
===================================================================
--- usr.bin/cmp/regular.c
+++ usr.bin/cmp/regular.c
@@ -59,9 +59,10 @@
c_regular(int fd1, const char *file1, off_t skip1, off_t len1,
int fd2, const char *file2, off_t skip2, off_t len2)
{
- u_char ch, *p1, *p2, *m1, *m2, *e1, *e2;
+ u_char *p1, *p2, *m1, *m2, *e1, *e2;
off_t byte, length, line;
- int dfound;
+ size_t len;
+ int dfound, tryfast;
off_t pagemask, off1, off2;
size_t pagesize;
struct sigaction act, oact;
@@ -101,28 +102,19 @@
}
dfound = 0;
+ tryfast = 1;
e1 = m1 + MMAP_CHUNK;
e2 = m2 + MMAP_CHUNK;
p1 = m1 + (skip1 - off1);
p2 = m2 + (skip2 - off2);
+ byte = line = 1;
- for (byte = line = 1; length--; ++byte) {
- if ((ch = *p1) != *p2) {
- if (xflag) {
+ while (length) {
+ len = MIN(MIN(e1 - p1, e2 - p2), length);
+ if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast))
dfound = 1;
- (void)printf("%08llx %02x %02x\n",
- (long long)byte - 1, ch, *p2);
- } else if (lflag) {
- dfound = 1;
- (void)printf("%6lld %3o %3o\n",
- (long long)byte, ch, *p2);
- } else
- diffmsg(file1, file2, byte, line);
- /* NOTREACHED */
- }
- if (ch == '\n')
- ++line;
- if (++p1 == e1) {
+ length -= len;
+ if ((p1 += len) == e1 && length) {
off1 += MMAP_CHUNK;
if ((p1 = m1 = remmap(m1, fd1, off1)) == NULL) {
munmap(m2, MMAP_CHUNK);
@@ -130,7 +122,7 @@
}
e1 = m1 + MMAP_CHUNK;
}
- if (++p2 == e2) {
+ if ((p2 += len) == e2 && length) {
off2 += MMAP_CHUNK;
if ((p2 = m2 = remmap(m2, fd2, off2)) == NULL) {
munmap(m1, MMAP_CHUNK);
Index: usr.bin/cmp/special.c
===================================================================
--- usr.bin/cmp/special.c
+++ usr.bin/cmp/special.c
@@ -36,71 +36,94 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include <sys/param.h>
#include <sys/types.h>
#include <err.h>
#include <stdlib.h>
#include <stdio.h>
+#include <unistd.h>
#include "extern.h"
+static int
+skip_helper(int fd, const char *file, off_t skip) {
+ char buf[MAXBSIZE];
+ int r;
+ size_t n;
+ while (skip) {
+ n = MIN(skip, (off_t)(sizeof buf));
+ r = read(fd, buf, n);
+ if (r <= 0) {
+ if (r < 0)
+ err(ERR_EXIT, "%s", file);
+ return 1;
+ }
+ skip -= n;
+ }
+ return 0;
+}
+
+static int
+read_helper(int fd, const char *file,
+ u_char *buf, size_t size,
+ u_char **cur, u_char **lim)
+{
+ int r;
+ *cur = *lim = buf;
+ r = read(fd, *cur, size);
+ if (r <= 0) {
+ if (r < 0)
+ err(ERR_EXIT, "%s", file);
+ return 1;
+ }
+ *lim += r;
+ return 0;
+}
+
void
c_special(int fd1, const char *file1, off_t skip1,
int fd2, const char *file2, off_t skip2)
{
- int ch1, ch2;
off_t byte, line;
- FILE *fp1, *fp2;
- int dfound;
-
- if ((fp1 = fdopen(fd1, "r")) == NULL)
- err(ERR_EXIT, "%s", file1);
- if ((fp2 = fdopen(fd2, "r")) == NULL)
- err(ERR_EXIT, "%s", file2);
+ int dfound, tryfast;
+ u_char b1[MAXBSIZE], b2[MAXBSIZE];
+ u_char *p1, *p2, *e1, *e2;
+ size_t len;
+ int eof1, eof2;
dfound = 0;
- while (skip1--)
- if (getc(fp1) == EOF)
+
+ if ((eof1 = skip_helper(fd1, file1, skip1)))
goto eof;
- while (skip2--)
- if (getc(fp2) == EOF)
+ if ((eof2 = skip_helper(fd2, file2, skip2)))
goto eof;
- for (byte = line = 1;; ++byte) {
- ch1 = getc(fp1);
- ch2 = getc(fp2);
- if (ch1 == EOF || ch2 == EOF)
+ p1 = e1 = b1;
+ p2 = e2 = b2;
+ byte = line = 1;
+ tryfast = 1;
+
+ for (;;) {
+ if (p1 == e1)
+ eof1 = read_helper(fd1, file1, b1, sizeof b1, &p1, &e1);
+ if (p2 == e2)
+ eof2 = read_helper(fd2, file2, b2, sizeof b2, &p2, &e2);
+ if (eof1 || eof2)
break;
- if (ch1 != ch2) {
- if (xflag) {
+ len = MIN(e1 - p1, e2 - p2);
+ if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast))
dfound = 1;
- (void)printf("%08llx %02x %02x\n",
- (long long)byte - 1, ch1, ch2);
- } else if (lflag) {
- dfound = 1;
- (void)printf("%6lld %3o %3o\n",
- (long long)byte, ch1, ch2);
- } else {
- diffmsg(file1, file2, byte, line);
- /* NOTREACHED */
- }
- }
- if (ch1 == '\n')
- ++line;
+ p1 += len;
+ p2 += len;
}
-eof: if (ferror(fp1))
- err(ERR_EXIT, "%s", file1);
- if (ferror(fp2))
- err(ERR_EXIT, "%s", file2);
- if (feof(fp1)) {
- if (!feof(fp2))
+eof: if (eof1) {
+ if (!eof2)
eofmsg(file1);
} else
- if (feof(fp2))
+ if (eof2)
eofmsg(file2);
- fclose(fp2);
- fclose(fp1);
if (dfound)
exit(DIFF_EXIT);
}
Index: usr.bin/wc/Makefile
===================================================================
--- usr.bin/wc/Makefile
+++ usr.bin/wc/Makefile
@@ -4,4 +4,6 @@
PROG= wc
LIBADD= xo
+SRCS= wc.c countnl.c
+
.include <bsd.prog.mk>
Index: usr.bin/wc/countnl.h
===================================================================
--- /dev/null
+++ usr.bin/wc/countnl.h
@@ -0,0 +1,3 @@
+#include <stdlib.h>
+
+size_t count_newlines(u_char *restrict, size_t);
Index: usr.bin/wc/countnl.c
===================================================================
--- /dev/null
+++ usr.bin/wc/countnl.c
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "countnl.h"
+
+size_t
+count_newlines(u_char *restrict p, size_t l) {
+ size_t n = 0;
+#if 1 /* VROOM VROOM */
+ /* Using long long makes this slower than the naive algorithm
+ * on some CPUs. Let's assume that long is the largest
+ * integer type that the CPU can work with efficiently. */
+ typedef unsigned long word;
+ word *restrict wp;
+ size_t wl;
+ /* Only do it if we've got at least a few words to process. */
+ if (l > sizeof (word) * 4) {
+ /* Align to word boundary. */
+ for (; ((uintptr_t)p & (sizeof(word) - 1)) && l; l--)
+ n += *p++ == '\n';
+ /* Process one word at a time. */
+ wp = (void *)p;
+ wl = l / sizeof (word);
+ l -= wl * sizeof (word);
+ p += wl * sizeof (word);
+ while (wl--) {
+ /* This is from the "Bit Twiddling Hacks"
+ * page, "Determine if a word has a byte less
+ * than n" using one of their tricks to turn
+ * newline bytes into zero bytes and then
+ * counting for bytes less than one. No, I do
+ * not understand it, but it works.
+ * <https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord>
+ */
+ word wv = *wp++ ^ ~(word)0/255 * (word)'\n';
+ n += ((~(word)0/255*(127+(1)) - ((wv)&~(word)0/255*127))
+ & ~(wv) & ~(word)0/255*128) / 128 % 255;
+ }
+ /* Leftovers handled below. */
+ }
+#endif
+ while (l--)
+ n += *p++ == '\n';
+ return n;
+}
+
Index: usr.bin/wc/wc.c
===================================================================
--- usr.bin/wc/wc.c
+++ usr.bin/wc/wc.c
@@ -59,6 +59,8 @@
#include <wctype.h>
#include <libxo/xo.h>
+#include "countnl.h"
+
static uintmax_t tlinect, twordct, tcharct, tlongline;
static int doline, doword, dochar, domulti, dolongline;
static volatile sig_atomic_t siginfo;
@@ -228,6 +230,7 @@
llct);
}
charct += len;
+ if (dolongline) {
for (p = buf; len--; ++p)
if (*p == '\n') {
if (tmpll > llct)
@@ -236,6 +239,8 @@
++linect;
} else
tmpll++;
+ } else
+ linect += count_newlines(buf, len);
}
reset_siginfo();
tlinect += linect;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Oct 19, 12:20 PM (21 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14264817
Default Alt Text
D16998.id47625.diff (10 KB)
Attached To
Mode
D16998: making cmp faster, especially on special files (and speed up wc -l too while at it)
Attached
Detach File
Event Timeline
Log In to Comment