Index: usr.bin/cmp/Makefile
===================================================================
--- usr.bin/cmp/Makefile
+++ usr.bin/cmp/Makefile
@@ -3,11 +3,15 @@
 
 .include <src.opts.mk>
 
+.PATH: ${SRCTOP}/usr.bin/wc
+
 PROG=	cmp
-SRCS=	cmp.c link.c misc.c regular.c special.c
+SRCS=	cmp.c link.c misc.c regular.c special.c countnl.c
 
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 
+CFLAGS+= -I${SRCTOP}/usr.bin/wc
+
 .include <bsd.prog.mk>
Index: usr.bin/cmp/extern.h
===================================================================
--- usr.bin/cmp/extern.h
+++ usr.bin/cmp/extern.h
@@ -39,6 +39,9 @@
 void	c_link(const char *, off_t, const char *, off_t);
 void	c_regular(int, const char *, off_t, off_t, int, const char *, off_t, off_t);
 void	c_special(int, const char *, off_t, int, const char *, off_t);
+int	c_chunk(const char *, const char *,
+		u_char *restrict, u_char *restrict, size_t,
+		off_t *, off_t *, int *);
 void	diffmsg(const char *, const char *, off_t, off_t);
 void	eofmsg(const char *);
 
Index: usr.bin/cmp/misc.c
===================================================================
--- usr.bin/cmp/misc.c
+++ usr.bin/cmp/misc.c
@@ -37,12 +37,15 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
+#include <sys/param.h>
 
 #include <err.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "extern.h"
+#include "countnl.h"
 
 void
 eofmsg(const char *file)
@@ -60,3 +63,69 @@
 		    file1, file2, (long long)byte, (long long)line);
 	exit(DIFF_EXIT);
 }
+
+static void
+founddiff(const char *file1, const char *file2,
+    u_char ch1, u_char ch2,
+    off_t byte, off_t line) {
+	if (xflag) {
+		(void)printf("%08llx %02x %02x\n",
+			     (long long)byte - 1, ch1, ch2);
+	} else if (lflag) {
+		(void)printf("%6lld %3o %3o\n",
+			     (long long)byte, ch1, ch2);
+	} else {
+		diffmsg(file1, file2, byte, line);
+		/* NOTREACHED */
+	}
+}
+
+int
+c_chunk(const char *file1, const char *file2,
+    u_char *restrict p1, u_char *restrict p2, size_t chunk_size,
+    off_t *obyte, off_t *oline, int *tryfast) {
+	size_t cbyte, cline;
+	u_char ch1, ch2;
+	int dfound;
+	size_t piece_size, piece_len;
+	/* If the chunk is too large, process it in smaller pieces. */
+	for (piece_size = chunk_size >= PAGE_SIZE * 2 ? PAGE_SIZE : chunk_size;
+	     chunk_size;
+	     chunk_size -= piece_len, *obyte += cbyte, *oline += cline) {
+		piece_len = MIN(chunk_size, piece_size);
+		if (*tryfast) {
+			/* The memcmp() is all that is needed in
+			   silent mode, otherwise it is done
+			   opportunistically to skip over pieces that
+			   have no differences. */
+			if ((dfound = memcmp(p1, p2, piece_len) != 0)) {
+				if (sflag)
+					exit(DIFF_EXIT);
+			} else {
+				cbyte = piece_len;
+				if (!sflag && !lflag) {
+					cline = count_newlines(p1, piece_len);
+				} else
+					cline = 0;
+				p1 += piece_len;
+				p2 += piece_len;
+				continue;
+			}
+		} else
+			dfound = 0;
+		for (cline = 0, cbyte = 0; cbyte != piece_len; cbyte++) {
+			if ((ch1 = *p1++) != (ch2 = *p2++)) {
+				dfound = 1;
+				founddiff(file1, file2, ch1, ch2,
+				    *obyte + cbyte, *oline + cline);
+			}
+			if (ch1 == '\n')
+				++cline;
+		}
+		/* Avoid the (not necessarily so fast) fast path for
+		   this piece if it seems like we might have to do a
+		   full compare anyway. */
+		*tryfast = !dfound;
+	}
+	return dfound;
+}
Index: usr.bin/cmp/regular.c
===================================================================
--- usr.bin/cmp/regular.c
+++ usr.bin/cmp/regular.c
@@ -59,9 +59,10 @@
 c_regular(int fd1, const char *file1, off_t skip1, off_t len1,
     int fd2, const char *file2, off_t skip2, off_t len2)
 {
-	u_char ch, *p1, *p2, *m1, *m2, *e1, *e2;
+	u_char *p1, *p2, *m1, *m2, *e1, *e2;
 	off_t byte, length, line;
-	int dfound;
+	size_t len;
+	int dfound, tryfast;
 	off_t pagemask, off1, off2;
 	size_t pagesize;
 	struct sigaction act, oact;
@@ -101,28 +102,19 @@
 	}
 
 	dfound = 0;
+	tryfast = 1;
 	e1 = m1 + MMAP_CHUNK;
 	e2 = m2 + MMAP_CHUNK;
 	p1 = m1 + (skip1 - off1);
 	p2 = m2 + (skip2 - off2);
+	byte = line = 1;
 
-	for (byte = line = 1; length--; ++byte) {
-		if ((ch = *p1) != *p2) {
-			if (xflag) {
+	while (length) {
+		len = MIN(MIN(e1 - p1, e2 - p2), length);
+		if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast))
 			dfound = 1;
-				(void)printf("%08llx %02x %02x\n",
-				    (long long)byte - 1, ch, *p2);
-			} else if (lflag) {
-				dfound = 1;
-				(void)printf("%6lld %3o %3o\n",
-				    (long long)byte, ch, *p2);
-			} else
-				diffmsg(file1, file2, byte, line);
-				/* NOTREACHED */
-		}
-		if (ch == '\n')
-			++line;
-		if (++p1 == e1) {
+		length -= len;
+		if ((p1 += len) == e1 && length) {
 			off1 += MMAP_CHUNK;
 			if ((p1 = m1 = remmap(m1, fd1, off1)) == NULL) {
 				munmap(m2, MMAP_CHUNK);
@@ -130,7 +122,7 @@
 			}
 			e1 = m1 + MMAP_CHUNK;
 		}
-		if (++p2 == e2) {
+		if ((p2 += len) == e2 && length) {
 			off2 += MMAP_CHUNK;
 			if ((p2 = m2 = remmap(m2, fd2, off2)) == NULL) {
 				munmap(m1, MMAP_CHUNK);
Index: usr.bin/cmp/special.c
===================================================================
--- usr.bin/cmp/special.c
+++ usr.bin/cmp/special.c
@@ -36,71 +36,94 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/param.h>
 #include <sys/types.h>
 
 #include <err.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <unistd.h>
 
 #include "extern.h"
 
+static int
+skip_helper(int fd, const char *file, off_t skip) {
+	char buf[MAXBSIZE];
+	int r;
+	size_t n;
+	while (skip) {
+		n = MIN(skip, (off_t)(sizeof buf));
+		r = read(fd, buf, n);
+		if (r <= 0) {
+			if (r < 0)
+				err(ERR_EXIT, "%s", file);
+			return 1;
+		}
+		skip -= n;
+	}
+	return 0;
+}
+
+static int
+read_helper(int fd, const char *file,
+    u_char *buf, size_t size,
+    u_char **cur, u_char **lim)
+{
+	int r;
+	*cur = *lim = buf;
+	r = read(fd, *cur, size);
+	if (r <= 0) {
+		if (r < 0)
+			err(ERR_EXIT, "%s", file);
+		return 1;
+	}
+	*lim += r;
+	return 0;
+}
+
 void
 c_special(int fd1, const char *file1, off_t skip1,
     int fd2, const char *file2, off_t skip2)
 {
-	int ch1, ch2;
 	off_t byte, line;
-	FILE *fp1, *fp2;
-	int dfound;
-
-	if ((fp1 = fdopen(fd1, "r")) == NULL)
-		err(ERR_EXIT, "%s", file1);
-	if ((fp2 = fdopen(fd2, "r")) == NULL)
-		err(ERR_EXIT, "%s", file2);
+	int dfound, tryfast;
+	u_char b1[MAXBSIZE], b2[MAXBSIZE];
+	u_char *p1, *p2, *e1, *e2;
+	size_t len;
+	int eof1, eof2;
 
 	dfound = 0;
-	while (skip1--)
-		if (getc(fp1) == EOF)
+
+	if ((eof1 = skip_helper(fd1, file1, skip1)))
 		goto eof;
-	while (skip2--)
-		if (getc(fp2) == EOF)
+	if ((eof2 = skip_helper(fd2, file2, skip2)))
 		goto eof;
 
-	for (byte = line = 1;; ++byte) {
-		ch1 = getc(fp1);
-		ch2 = getc(fp2);
-		if (ch1 == EOF || ch2 == EOF)
+	p1 = e1 = b1;
+	p2 = e2 = b2;
+	byte = line = 1;
+	tryfast = 1;
+
+	for (;;) {
+		if (p1 == e1)
+			eof1 = read_helper(fd1, file1, b1, sizeof b1, &p1, &e1);
+		if (p2 == e2)
+			eof2 = read_helper(fd2, file2, b2, sizeof b2, &p2, &e2);
+		if (eof1 || eof2)
 			break;
-		if (ch1 != ch2) {
-			if (xflag) {
+		len = MIN(e1 - p1, e2 - p2);
+		if (c_chunk(file1, file2, p1, p2, len, &line, &byte, &tryfast))
 			dfound = 1;
-				(void)printf("%08llx %02x %02x\n",
-				    (long long)byte - 1, ch1, ch2);
-			} else if (lflag) {
-				dfound = 1;
-				(void)printf("%6lld %3o %3o\n",
-				    (long long)byte, ch1, ch2);
-			} else {
-				diffmsg(file1, file2, byte, line);
-				/* NOTREACHED */
-			}
-		}
-		if (ch1 == '\n')
-			++line;
+		p1 += len;
+		p2 += len;
 	}
 
-eof:	if (ferror(fp1))
-		err(ERR_EXIT, "%s", file1);
-	if (ferror(fp2))
-		err(ERR_EXIT, "%s", file2);
-	if (feof(fp1)) {
-		if (!feof(fp2))
+eof:	if (eof1) {
+		if (!eof2)
 			eofmsg(file1);
 	} else
-		if (feof(fp2))
+		if (eof2)
 			eofmsg(file2);
-	fclose(fp2);
-	fclose(fp1);
 	if (dfound)
 		exit(DIFF_EXIT);
 }
Index: usr.bin/wc/Makefile
===================================================================
--- usr.bin/wc/Makefile
+++ usr.bin/wc/Makefile
@@ -4,4 +4,6 @@
 PROG=	wc
 LIBADD=	xo
 
+SRCS=	wc.c countnl.c
+
 .include <bsd.prog.mk>
Index: usr.bin/wc/countnl.h
===================================================================
--- /dev/null
+++ usr.bin/wc/countnl.h
@@ -0,0 +1,3 @@
+#include <stdlib.h>
+
+size_t	count_newlines(u_char *restrict, size_t);
Index: usr.bin/wc/countnl.c
===================================================================
--- /dev/null
+++ usr.bin/wc/countnl.c
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "countnl.h"
+
+size_t
+count_newlines(u_char *restrict p, size_t l) {
+	size_t n = 0;
+#if 1 /* VROOM VROOM */
+	/* Using long long makes this slower than the naive algorithm
+	 * on some CPUs.  Let's assume that long is the largest
+	 * integer type that the CPU can work with efficiently. */
+	typedef unsigned long word;
+	word *restrict wp;
+	size_t wl;
+	/* Only do it if we've got at least a few words to process. */
+	if (l > sizeof (word) * 4) {
+		/* Align to word boundary. */
+		for (; ((uintptr_t)p & (sizeof(word) - 1)) && l; l--)
+			n += *p++ == '\n';
+		/* Process one word at a time. */
+		wp = (void *)p;
+		wl = l / sizeof (word);
+		l -= wl * sizeof (word);
+		p += wl * sizeof (word);
+		while (wl--) {
+			/* This is from the "Bit Twiddling Hacks"
+			 * page, "Determine if a word has a byte less
+			 * than n" using one of their tricks to turn
+			 * newline bytes into zero bytes and then
+			 * counting for bytes less than one. No, I do
+			 * not understand it, but it works.
+			 * <https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord>
+			 */
+			word wv = *wp++ ^ ~(word)0/255 * (word)'\n';
+			n += ((~(word)0/255*(127+(1)) - ((wv)&~(word)0/255*127))
+			      & ~(wv) & ~(word)0/255*128) / 128 % 255;
+		}
+		/* Leftovers handled below. */
+	}
+#endif
+	while (l--)
+		n += *p++ == '\n';
+	return n;
+}
+
Index: usr.bin/wc/wc.c
===================================================================
--- usr.bin/wc/wc.c
+++ usr.bin/wc/wc.c
@@ -59,6 +59,8 @@
 #include <wctype.h>
 #include <libxo/xo.h>
 
+#include "countnl.h"
+
 static uintmax_t tlinect, twordct, tcharct, tlongline;
 static int doline, doword, dochar, domulti, dolongline;
 static volatile sig_atomic_t siginfo;
@@ -228,6 +230,7 @@
 					    llct);
 				}
 				charct += len;
+				if (dolongline) {
 					for (p = buf; len--; ++p)
 						if (*p == '\n') {
 							if (tmpll > llct)
@@ -236,6 +239,8 @@
 							++linect;
 						} else
 							tmpll++;
+				} else
+					linect += count_newlines(buf, len);
 			}
 			reset_siginfo();
 			tlinect += linect;