diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist --- a/etc/mtree/BSD.tests.dist +++ b/etc/mtree/BSD.tests.dist @@ -1120,6 +1120,8 @@ .. vmstat .. + wc + .. xargs .. xinstall diff --git a/usr.bin/wc/Makefile b/usr.bin/wc/Makefile --- a/usr.bin/wc/Makefile +++ b/usr.bin/wc/Makefile @@ -12,4 +12,7 @@ CFLAGS+=-DWITH_CASPER .endif +HAS_TESTS= +SUBDIR.${MK_TESTS}= tests + .include diff --git a/usr.bin/wc/foo b/usr.bin/wc/foo new file mode 100644 diff --git a/usr.bin/wc/tests/Makefile b/usr.bin/wc/tests/Makefile new file mode 100644 --- /dev/null +++ b/usr.bin/wc/tests/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +PACKAGE= tests + +ATF_TESTS_SH= wc_test +BINDIR= ${TESTSDIR} + +.include diff --git a/usr.bin/wc/tests/wc_test.sh b/usr.bin/wc/tests/wc_test.sh new file mode 100755 --- /dev/null +++ b/usr.bin/wc/tests/wc_test.sh @@ -0,0 +1,107 @@ +# +# Copyright (c) 2023 Klara, Inc. +# +# SPDX-License-Identifier: BSD-2-Clause +# + +# +# Sample text containing multibyte characters +# +tv="Der bode en underlig gråsprængt en +på den yderste nøgne ø; – +han gjorde visst intet menneske mén +hverken på land eller sjø; +dog stundom gnistred hans øjne stygt, – +helst mod uroligt vejr, – +og da mente folk, at han var forrykt, +og da var der få, som uden frykt +kom Terje Vigen nær. + +" +tvl=10 +tvw=55 +tvc=300 +tvm=283 +tvcL=42 +tvmL=39 + +# +# Run a series of tests using the same input file. The first argument +# is the name of the file. The next three are the expected line, +# word, and byte counts. The optional fifth is the expected character +# count; if not provided, it is expected to be identical to the byte +# count. +# +atf_check_wc() { + local file="$1" + local l="$2" + local w="$3" + local c="$4" + local m="${5-$4}" + + export LC_CTYPE=C.UTF-8 + atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}" + atf_check -o match:"^ +${l}\$" wc -l <"${file}" + atf_check -o match:"^ +${w}\$" wc -w <"${file}" + atf_check -o match:"^ +${c}\$" wc -c <"${file}" + atf_check -o match:"^ +${m}\$" wc -m <"${file}" + atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file" + atf_check -o match:"^ +${l} ${file}\$" wc -l "$file" + atf_check -o match:"^ +${w} ${file}\$" wc -w "$file" + atf_check -o match:"^ +${c} ${file}\$" wc -c "$file" + atf_check -o match:"^ +${m} ${file}\$" wc -m "$file" +} + +atf_test_case basic +basic_body() +{ + echo "a b" >foo + atf_check_wc foo 1 2 4 +} + +atf_test_case empty +empty_body() +{ + echo -n "" >foo + atf_check_wc foo 0 0 0 +} + +atf_test_case multiline +multiline_body() +{ + echo -n "$tv" >foo + atf_check_wc foo $tvl $tvw $tvc $tvm + # longest line in bytes + atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo + atf_check -o match:"^ +$tvc +$tvcL" wc -cL foo + atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c)) +} + +atf_test_case unterminated +unterminated_body() +{ + echo -n "a b" >foo + atf_check_wc foo 0 2 3 +} + +atf_init_test_cases() +{ + atf_add_test_case basic + atf_add_test_case empty + atf_add_test_case multiline + atf_add_test_case unterminated + atf_add_test_case large +} diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c --- a/usr.bin/wc/wc.c +++ b/usr.bin/wc/wc.c @@ -50,10 +50,10 @@ #include #include -#include #include #include #include +#include #include #include #include @@ -66,9 +66,11 @@ #include #include +static const char *stdin_filename = "stdin"; + static fileargs_t *fa; static uintmax_t tlinect, twordct, tcharct, tlongline; -static int doline, doword, dochar, domulti, dolongline; +static bool doline, doword, dochar, domulti, dolongline; static volatile sig_atomic_t siginfo; static xo_handle_t *stderr_handle; @@ -107,21 +109,21 @@ while ((ch = getopt(argc, argv, "clmwL")) != -1) switch((char)ch) { case 'l': - doline = 1; + doline = true; break; case 'w': - doword = 1; + doword = true; break; case 'c': - dochar = 1; - domulti = 0; + dochar = true; + domulti = false; break; case 'L': - dolongline = 1; + dolongline = true; break; case 'm': - domulti = 1; - dochar = 0; + domulti = true; + dochar = false; break; case '?': default: @@ -162,19 +164,19 @@ errors = 0; total = 0; - if (!*argv) { - xo_open_instance("file"); - if (cnt((char *)NULL) != 0) + if (argc == 0) { + xo_open_instance("file"); + if (cnt(NULL) != 0) ++errors; - xo_close_instance("file"); + xo_close_instance("file"); } else { - do { - xo_open_instance("file"); - if (cnt(*argv) != 0) + while (argc--) { + xo_open_instance("file"); + if (cnt(*argv++) != 0) ++errors; - xo_close_instance("file"); + xo_close_instance("file"); ++total; - } while(*++argv); + } } xo_close_list("file"); @@ -187,7 +189,8 @@ fileargs_free(fa); xo_close_container("wc"); - xo_finish(); + if (xo_finish() < 0) + xo_err(1, "stdout"); exit(errors == 0 ? 0 : 1); } @@ -212,7 +215,7 @@ xo_emit_h(xop, " {:characters/%7ju/%ju}", charct); if (dolongline) xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct); - if (file != NULL) + if (file != stdin_filename) xo_emit_h(xop, " {:filename/%s}\n", file); else xo_emit_h(xop, "\n"); @@ -221,20 +224,21 @@ static int cnt(const char *file) { + char buf[MAXBSIZE], *p; struct stat sb; + mbstate_t mbs; uintmax_t linect, wordct, charct, llct, tmpll; - int fd, len, warned; + ssize_t len; size_t clen; - short gotsp; - u_char *p; - u_char buf[MAXBSIZE]; + int fd; wchar_t wch; - mbstate_t mbs; + bool gotsp, warned; linect = wordct = charct = llct = tmpll = 0; - if (file == NULL) + if (file == NULL) { fd = STDIN_FILENO; - else if ((fd = fileargs_open(fa, file)) < 0) { + file = stdin_filename; + } else if ((fd = fileargs_open(fa, file)) < 0) { xo_warn("%s: open", file); return (1); } @@ -246,7 +250,7 @@ */ if (doline == 0 && dolongline == 0) { if (fstat(fd, &sb)) { - xo_warn("%s: fstat", file != NULL ? file : "stdin"); + xo_warn("%s: fstat", file); (void)close(fd); return (1); } @@ -265,9 +269,9 @@ * lines than to get words, since the word count requires locale * handling. */ - while ((len = read(fd, buf, MAXBSIZE))) { - if (len == -1) { - xo_warn("%s: read", file != NULL ? file : "stdin"); + while ((len = read(fd, buf, sizeof(buf)))) { + if (len < 0) { + xo_warn("%s: read", file); (void)close(fd); return (1); } @@ -275,14 +279,16 @@ show_cnt(file, linect, wordct, charct, llct); charct += len; if (doline || dolongline) { - for (p = buf; len--; ++p) + for (p = buf; len > 0; --len, ++p) { if (*p == '\n') { if (tmpll > llct) llct = tmpll; tmpll = 0; ++linect; - } else + } else { tmpll++; + } + } } } reset_siginfo(); @@ -297,12 +303,12 @@ return (0); /* Do it the hard way... */ -word: gotsp = 1; - warned = 0; +word: gotsp = true; + warned = false; memset(&mbs, 0, sizeof(mbs)); - while ((len = read(fd, buf, MAXBSIZE)) != 0) { - if (len == -1) { - xo_warn("%s: read", file != NULL ? file : "stdin"); + while ((len = read(fd, buf, sizeof(buf))) != 0) { + if (len < 0) { + xo_warn("%s: read", file); (void)close(fd); return (1); } @@ -313,21 +319,20 @@ if (!domulti || MB_CUR_MAX == 1) { clen = 1; wch = (unsigned char)*p; - } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == - (size_t)-1) { + } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 0) { + clen = 1; + } else if (clen == (size_t)-1) { if (!warned) { errno = EILSEQ; - xo_warn("%s", - file != NULL ? file : "stdin"); - warned = 1; + xo_warn("%s", file); + warned = true; } memset(&mbs, 0, sizeof(mbs)); clen = 1; wch = (unsigned char)*p; - } else if (clen == (size_t)-2) + } else if (clen == (size_t)-2) { break; - else if (clen == 0) - clen = 1; + } charct++; if (wch != L'\n') tmpll++; @@ -339,18 +344,19 @@ tmpll = 0; ++linect; } - if (iswspace(wch)) - gotsp = 1; - else if (gotsp) { - gotsp = 0; + if (iswspace(wch)) { + gotsp = true; + } else if (gotsp) { + gotsp = false; ++wordct; } } } reset_siginfo(); - if (domulti && MB_CUR_MAX > 1) + if (domulti && MB_CUR_MAX > 1) { if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) - xo_warn("%s", file != NULL ? file : "stdin"); + xo_warn("%s", file); + } if (doline) tlinect += linect; if (doword)