diff --git a/usr.bin/uniq/tests/uniq_test.sh b/usr.bin/uniq/tests/uniq_test.sh index 8dc2015734f6..537962eb6513 100755 --- a/usr.bin/uniq/tests/uniq_test.sh +++ b/usr.bin/uniq/tests/uniq_test.sh @@ -1,175 +1,178 @@ # # Copyright (c) 2024 Klara, Inc. # # SPDX-License-Identifier: BSD-2-Clause # atf_check_uniq() { atf_check uniq "$@" input actual atf_check diff -u actual expected atf_check uniq "$@" - actual input printf "a\nb\na\n" >expected atf_check_uniq } atf_test_case count count_head() { atf_set descr "basic test showing counts" } count_body() { printf "a\na\nb\nb\nb\na\na\na\na\n" >input printf " 2 a\n 3 b\n 4 a\n" >expected atf_check_uniq -c atf_check_uniq --count } atf_test_case repeated repeated_head() { atf_set descr "print repeated lines only" } repeated_body() { printf "a\na\nb\na\na\n" >input printf "a\na\n" >expected atf_check_uniq -d atf_check_uniq --repeated } atf_test_case count_repeated count_repeated_head() { atf_set descr "count and print repeated lines only" } count_repeated_body() { printf "a\na\nb\nb\na\n" >input printf " 2 a\n 2 b\n" >expected atf_check_uniq --count --repeated } atf_test_case all_repeated all_repeated_head() { atf_set descr "print every instance of repeated lines" } all_repeated_body() { printf "a\na\nb\na\na\n" >input printf "a\na\na\na\n" >expected atf_check_uniq -D atf_check_uniq --all-repeated } atf_test_case skip_fields skip_fields_head() { atf_set descr "skip fields" } skip_fields_body() { printf "1 a\n2 a\n3 b\n4 b\n5 a\n6 a\n" >input printf "1 a\n3 b\n5 a\n" >expected + atf_check_uniq -1 atf_check_uniq -f 1 atf_check_uniq --skip-fields 1 } atf_test_case skip_fields_tab skip_fields_tab_head() { atf_set descr "skip fields (with tabs)" } skip_fields_tab_body() { printf "1\ta\n2\ta\n3\tb\n4\tb\n5\ta\n6\ta\n" >input printf "1\ta\n3\tb\n5\ta\n" >expected + atf_check_uniq -1 atf_check_uniq -f 1 atf_check_uniq --skip-fields 1 } atf_test_case ignore_case ignore_case_head() { atf_set descr "ignore case" } ignore_case_body() { printf "a\nA\nb\nB\na\nA\n" >input printf "a\nb\na\n" >expected atf_check_uniq -i atf_check_uniq --ignore-case } atf_test_case skip_chars skip_chars_head() { atf_set descr "skip chars" } skip_chars_body() { printf "1 a\n2 a\n3 b\n4 b\n5 a\n6 a\n" >input printf "1 a\n3 b\n5 a\n" >expected + atf_check_uniq +2 atf_check_uniq -s 2 atf_check_uniq --skip-chars 2 } atf_test_case unique unique_head() { atf_set descr "print non-repeated lines only" } unique_body() { printf "a\na\nb\na\na\n" >input printf "b\n" >expected atf_check_uniq -u atf_check_uniq --unique } atf_test_case count_unique count_unique_head() { atf_set descr "print non-repeated lines with count" } count_unique_body() { printf "a\na\nb\n" >input printf " 1 b\n" >expected atf_check_uniq --unique --count atf_check_uniq --count --unique } atf_test_case interactive interactive_head() { atf_set descr "test interactive use" } interactive_body() { sh -c 'yes | stdbuf -oL uniq >actual' & pid=$! sleep 1 kill $! atf_check -o inline:"y\n" cat actual } atf_test_case interactive_repeated interactive_repeated_head() { atf_set descr "test interactive use with -d" } interactive_repeated_body() { sh -c 'yes | stdbuf -oL uniq -d >actual' & pid=$! sleep 1 kill $! atf_check -o inline:"y\n" cat actual } atf_init_test_cases() { atf_add_test_case basic atf_add_test_case count atf_add_test_case repeated atf_add_test_case count_repeated atf_add_test_case all_repeated atf_add_test_case skip_fields atf_add_test_case skip_fields_tab atf_add_test_case ignore_case atf_add_test_case skip_chars atf_add_test_case unique atf_add_test_case count_unique atf_add_test_case interactive atf_add_test_case interactive_repeated } diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c index 0bc9b2b86af3..ef59d7339d0c 100644 --- a/usr.bin/uniq/uniq.c +++ b/usr.bin/uniq/uniq.c @@ -1,373 +1,369 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Case Larsen. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static enum { DF_NONE, DF_NOSEP, DF_PRESEP, DF_POSTSEP } Dflag; static bool cflag, dflag, uflag, iflag; static long long numchars, numfields, repeats; static const struct option long_opts[] = { {"all-repeated",optional_argument, NULL, 'D'}, {"count", no_argument, NULL, 'c'}, {"repeated", no_argument, NULL, 'd'}, {"skip-fields", required_argument, NULL, 'f'}, {"ignore-case", no_argument, NULL, 'i'}, {"skip-chars", required_argument, NULL, 's'}, {"unique", no_argument, NULL, 'u'}, {NULL, no_argument, NULL, 0} }; static FILE *file(const char *, const char *); static wchar_t *convert(const char *); static int inlcmp(const char *, const char *); static void show(FILE *, const char *); static wchar_t *skip(wchar_t *); static void obsolete(char *[]); static void usage(void); int main (int argc, char *argv[]) { wchar_t *tprev, *tthis; FILE *ifp, *ofp; int ch, comp; size_t prevbuflen, thisbuflen, b1; char *prevline, *thisline, *p; const char *errstr, *ifn; cap_rights_t rights; (void) setlocale(LC_ALL, ""); obsolete(argv); while ((ch = getopt_long(argc, argv, "+D::cdif:s:u", long_opts, NULL)) != -1) switch (ch) { case 'D': if (optarg == NULL || strcasecmp(optarg, "none") == 0) Dflag = DF_NOSEP; else if (strcasecmp(optarg, "prepend") == 0) Dflag = DF_PRESEP; else if (strcasecmp(optarg, "separate") == 0) Dflag = DF_POSTSEP; else usage(); break; case 'c': cflag = true; break; case 'd': dflag = true; break; case 'i': iflag = true; break; case 'f': numfields = strtonum(optarg, 0, INT_MAX, &errstr); if (errstr) errx(1, "field skip value is %s: %s", errstr, optarg); break; case 's': numchars = strtonum(optarg, 0, INT_MAX, &errstr); if (errstr != NULL) errx(1, "character skip value is %s: %s", errstr, optarg); break; case 'u': uflag = true; break; case '?': default: usage(); } argc -= optind; argv += optind; if (argc > 2) usage(); if (Dflag && dflag) dflag = false; ifp = stdin; ifn = "stdin"; ofp = stdout; if (argc > 0 && strcmp(argv[0], "-") != 0) ifp = file(ifn = argv[0], "r"); cap_rights_init(&rights, CAP_FSTAT, CAP_READ); if (caph_rights_limit(fileno(ifp), &rights) < 0) err(1, "unable to limit rights for %s", ifn); cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE); if (argc > 1) ofp = file(argv[1], "w"); else cap_rights_set(&rights, CAP_IOCTL); if (caph_rights_limit(fileno(ofp), &rights) < 0) { err(1, "unable to limit rights for %s", argc > 1 ? argv[1] : "stdout"); } if (cap_rights_is_set(&rights, CAP_IOCTL)) { unsigned long cmd; cmd = TIOCGETA; /* required by isatty(3) in printf(3) */ if (caph_ioctls_limit(fileno(ofp), &cmd, 1) < 0) { err(1, "unable to limit ioctls for %s", argc > 1 ? argv[1] : "stdout"); } } caph_cache_catpages(); if (caph_enter() < 0) err(1, "unable to enter capability mode"); prevbuflen = thisbuflen = 0; prevline = thisline = NULL; if (getline(&prevline, &prevbuflen, ifp) < 0) { if (ferror(ifp)) err(1, "%s", ifn); exit(0); } if (!cflag && !Dflag && !dflag && !uflag) show(ofp, prevline); tprev = convert(prevline); tthis = NULL; while (getline(&thisline, &thisbuflen, ifp) >= 0) { if (tthis != NULL) free(tthis); tthis = convert(thisline); if (tthis == NULL && tprev == NULL) comp = inlcmp(thisline, prevline); else if (tthis == NULL || tprev == NULL) comp = 1; else comp = wcscoll(tthis, tprev); if (comp) { /* If different, print; set previous to new value. */ if (Dflag == DF_POSTSEP && repeats > 0) fputc('\n', ofp); if (!cflag && !Dflag && !dflag && !uflag) show(ofp, thisline); else if (!Dflag && (!dflag || (cflag && repeats > 0)) && (!uflag || repeats == 0)) show(ofp, prevline); p = prevline; b1 = prevbuflen; prevline = thisline; prevbuflen = thisbuflen; if (tprev != NULL) free(tprev); tprev = tthis; thisline = p; thisbuflen = b1; tthis = NULL; repeats = 0; } else { if (Dflag) { if (repeats == 0) { if (Dflag == DF_PRESEP) fputc('\n', ofp); show(ofp, prevline); } show(ofp, thisline); } else if (dflag && !cflag) { if (repeats == 0) show(ofp, prevline); } ++repeats; } } if (ferror(ifp)) err(1, "%s", ifn); if (!cflag && !Dflag && !dflag && !uflag) /* already printed */ ; else if (!Dflag && (!dflag || (cflag && repeats > 0)) && (!uflag || repeats == 0)) show(ofp, prevline); exit(0); } static wchar_t * convert(const char *str) { size_t n; wchar_t *buf, *ret, *p; if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) return (NULL); if (SIZE_MAX / sizeof(*buf) < n + 1) errx(1, "conversion buffer length overflow"); if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) err(1, "malloc"); if (mbstowcs(buf, str, n + 1) != n) errx(1, "internal mbstowcs() error"); /* The last line may not end with \n. */ if (n > 0 && buf[n - 1] == L'\n') buf[n - 1] = L'\0'; /* If requested get the chosen fields + character offsets. */ if (numfields || numchars) { if ((ret = wcsdup(skip(buf))) == NULL) err(1, "wcsdup"); free(buf); } else ret = buf; if (iflag) { for (p = ret; *p != L'\0'; p++) *p = towlower(*p); } return (ret); } static int inlcmp(const char *s1, const char *s2) { int c1, c2; while (*s1 == *s2++) if (*s1++ == '\0') return (0); c1 = (unsigned char)*s1; c2 = (unsigned char)*(s2 - 1); /* The last line may not end with \n. */ if (c1 == '\n') c1 = '\0'; if (c2 == '\n') c2 = '\0'; return (c1 - c2); } /* * show -- * Output a line depending on the flags and number of repetitions * of the line. */ static void show(FILE *ofp, const char *str) { if (cflag) (void)fprintf(ofp, "%4lld %s", repeats + 1, str); else (void)fprintf(ofp, "%s", str); } static wchar_t * skip(wchar_t *str) { long long nchars, nfields; for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { while (iswblank(*str)) str++; while (*str != L'\0' && !iswblank(*str)) str++; } for (nchars = numchars; nchars-- && *str != L'\0'; ++str) ; return(str); } static FILE * file(const char *name, const char *mode) { FILE *fp; if ((fp = fopen(name, mode)) == NULL) err(1, "%s", name); return(fp); } static void obsolete(char *argv[]) { - int len; - char *ap, *p, *start; + char *ap, *p; while ((ap = *++argv)) { /* Return if "--" or not an option of any form. */ if (ap[0] != '-') { if (ap[0] != '+') return; - } else if (ap[1] == '-') + } else if (ap[1] == '-') { return; + } if (!isdigit((unsigned char)ap[1])) continue; /* * Digit signifies an old-style option. Malloc space for dash, * new option and argument. */ - len = strlen(ap); - if ((start = p = malloc(len + 3)) == NULL) + if (asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1) < 0) err(1, "malloc"); - *p++ = '-'; - *p++ = ap[0] == '+' ? 's' : 'f'; - (void)strcpy(p, ap + 1); - *argv = start; + *argv = p; } } static void usage(void) { (void)fprintf(stderr, "usage: uniq [-c | -d | -D | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); exit(1); }