diff --git a/usr.bin/uniq/tests/uniq_test.sh b/usr.bin/uniq/tests/uniq_test.sh
index ddd9ec9881dd..8dc2015734f6 100755
--- a/usr.bin/uniq/tests/uniq_test.sh
+++ b/usr.bin/uniq/tests/uniq_test.sh
@@ -1,149 +1,175 @@
#
# Copyright (c) 2024 Klara, Inc.
#
# SPDX-License-Identifier: BSD-2-Clause
#
atf_check_uniq() {
atf_check uniq "$@" input actual
atf_check diff -u actual expected
atf_check uniq "$@" - actual input
printf "a\nb\na\n" >expected
atf_check_uniq
}
atf_test_case count
count_head() {
atf_set descr "basic test showing counts"
}
count_body() {
printf "a\na\nb\nb\nb\na\na\na\na\n" >input
printf " 2 a\n 3 b\n 4 a\n" >expected
atf_check_uniq -c
atf_check_uniq --count
}
atf_test_case repeated
repeated_head() {
atf_set descr "print repeated lines only"
}
repeated_body() {
printf "a\na\nb\na\na\n" >input
printf "a\na\n" >expected
atf_check_uniq -d
atf_check_uniq --repeated
}
atf_test_case count_repeated
count_repeated_head() {
atf_set descr "count and print repeated lines only"
}
count_repeated_body() {
printf "a\na\nb\nb\na\n" >input
printf " 2 a\n 2 b\n" >expected
atf_check_uniq --count --repeated
}
atf_test_case all_repeated
all_repeated_head() {
atf_set descr "print every instance of repeated lines"
}
all_repeated_body() {
printf "a\na\nb\na\na\n" >input
printf "a\na\na\na\n" >expected
atf_check_uniq -D
atf_check_uniq --all-repeated
}
atf_test_case skip_fields
skip_fields_head() {
atf_set descr "skip fields"
}
skip_fields_body() {
printf "1 a\n2 a\n3 b\n4 b\n5 a\n6 a\n" >input
printf "1 a\n3 b\n5 a\n" >expected
atf_check_uniq -f 1
atf_check_uniq --skip-fields 1
}
atf_test_case skip_fields_tab
skip_fields_tab_head() {
atf_set descr "skip fields (with tabs)"
}
skip_fields_tab_body() {
printf "1\ta\n2\ta\n3\tb\n4\tb\n5\ta\n6\ta\n" >input
printf "1\ta\n3\tb\n5\ta\n" >expected
atf_check_uniq -f 1
atf_check_uniq --skip-fields 1
}
atf_test_case ignore_case
ignore_case_head() {
atf_set descr "ignore case"
}
ignore_case_body() {
printf "a\nA\nb\nB\na\nA\n" >input
printf "a\nb\na\n" >expected
atf_check_uniq -i
atf_check_uniq --ignore-case
}
atf_test_case skip_chars
skip_chars_head() {
atf_set descr "skip chars"
}
skip_chars_body() {
printf "1 a\n2 a\n3 b\n4 b\n5 a\n6 a\n" >input
printf "1 a\n3 b\n5 a\n" >expected
atf_check_uniq -s 2
atf_check_uniq --skip-chars 2
}
atf_test_case unique
unique_head() {
atf_set descr "print non-repeated lines only"
}
unique_body() {
printf "a\na\nb\na\na\n" >input
printf "b\n" >expected
atf_check_uniq -u
atf_check_uniq --unique
}
atf_test_case count_unique
count_unique_head() {
atf_set descr "print non-repeated lines with count"
}
count_unique_body() {
printf "a\na\nb\n" >input
printf " 1 b\n" >expected
atf_check_uniq --unique --count
atf_check_uniq --count --unique
}
+atf_test_case interactive
+interactive_head() {
+ atf_set descr "test interactive use"
+}
+interactive_body() {
+ sh -c 'yes | stdbuf -oL uniq >actual' &
+ pid=$!
+ sleep 1
+ kill $!
+ atf_check -o inline:"y\n" cat actual
+}
+
+atf_test_case interactive_repeated
+interactive_repeated_head() {
+ atf_set descr "test interactive use with -d"
+}
+interactive_repeated_body() {
+ sh -c 'yes | stdbuf -oL uniq -d >actual' &
+ pid=$!
+ sleep 1
+ kill $!
+ atf_check -o inline:"y\n" cat actual
+}
+
atf_init_test_cases()
{
atf_add_test_case basic
atf_add_test_case count
atf_add_test_case repeated
atf_add_test_case count_repeated
atf_add_test_case all_repeated
atf_add_test_case skip_fields
atf_add_test_case skip_fields_tab
atf_add_test_case ignore_case
atf_add_test_case skip_chars
atf_add_test_case unique
atf_add_test_case count_unique
+ atf_add_test_case interactive
+ atf_add_test_case interactive_repeated
}
diff --git a/usr.bin/uniq/uniq.1 b/usr.bin/uniq/uniq.1
index e01c84328575..30dee856d772 100644
--- a/usr.bin/uniq/uniq.1
+++ b/usr.bin/uniq/uniq.1
@@ -1,215 +1,218 @@
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" This code is derived from software contributed to Berkeley by
.\" the Institute of Electrical and Electronics Engineers, Inc.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd June 7, 2020
+.Dd January 12, 2024
.Dt UNIQ 1
.Os
.Sh NAME
.Nm uniq
.Nd report or filter out repeated lines in a file
.Sh SYNOPSIS
.Nm
.Op Fl c | Fl d | Fl D | Fl u
.Op Fl i
.Op Fl f Ar num
.Op Fl s Ar chars
.Oo
.Ar input_file
.Op Ar output_file
.Oc
.Sh DESCRIPTION
The
.Nm
utility reads the specified
.Ar input_file
comparing adjacent lines, and writes a copy of each unique input line to
the
.Ar output_file .
If
.Ar input_file
is a single dash
.Pq Sq Fl
or absent, the standard input is read.
If
.Ar output_file
is absent, standard output is used for output.
The second and succeeding copies of identical adjacent input lines are
not written.
Repeated lines in the input will not be detected if they are not adjacent,
so it may be necessary to sort the files first.
.Pp
The following options are available:
.Bl -tag -width Ds
.It Fl c , Fl -count
Precede each output line with the count of the number of times the line
occurred in the input, followed by a single space.
.It Fl d , Fl -repeated
Output a single copy of each line that is repeated in the input.
+Ignored if
+.Fl D
+is also specified.
.It Fl D , Fl -all-repeated Op Ar septype
Output all lines that are repeated (like
.Fl d ,
but each copy of the repeated line is written).
The optional
.Ar septype
argument controls how to separate groups of repeated lines in the output;
it must be one of the following values:
.Pp
.Bl -tag -compact -width separate
.It none
Do not separate groups of lines (this is the default).
.It prepend
Output an empty line before each group of lines.
.It separate
Output an empty line after each group of lines.
.El
.It Fl f Ar num , Fl -skip-fields Ar num
Ignore the first
.Ar num
fields in each input line when doing comparisons.
A field is a string of non-blank characters separated from adjacent fields
by blanks.
Field numbers are one based, i.e., the first field is field one.
.It Fl i , Fl -ignore-case
Case insensitive comparison of lines.
.It Fl s Ar chars , Fl -skip-chars Ar chars
Ignore the first
.Ar chars
characters in each input line when doing comparisons.
If specified in conjunction with the
.Fl f , Fl -unique
option, the first
.Ar chars
characters after the first
.Ar num
fields will be ignored.
Character numbers are one based, i.e., the first character is character one.
.It Fl u , Fl -unique
Only output lines that are not repeated in the input.
.\".It Fl Ns Ar n
.\"(Deprecated; replaced by
.\".Fl f ) .
.\"Ignore the first n
.\"fields on each input line when doing comparisons,
.\"where n is a number.
.\"A field is a string of non-blank
.\"characters separated from adjacent fields
.\"by blanks.
.\".It Cm \&\(pl Ns Ar n
.\"(Deprecated; replaced by
.\".Fl s ) .
.\"Ignore the first
.\".Ar m
.\"characters when doing comparisons, where
.\".Ar m
.\"is a
.\"number.
.El
.Sh ENVIRONMENT
The
.Ev LANG ,
.Ev LC_ALL ,
.Ev LC_COLLATE
and
.Ev LC_CTYPE
environment variables affect the execution of
.Nm
as described in
.Xr environ 7 .
.Sh EXIT STATUS
.Ex -std
.Sh EXAMPLES
Assuming a file named cities.txt with the following content:
.Bd -literal -offset indent
Madrid
Lisbon
Madrid
.Ed
.Pp
The following command reports three different lines since identical elements
are not adjacent:
.Bd -literal -offset indent
$ uniq -u cities.txt
Madrid
Lisbon
Madrid
.Ed
.Pp
Sort the file and count the number of identical lines:
.Bd -literal -offset indent
$ sort cities.txt | uniq -c
1 Lisbon
2 Madrid
.Ed
.Pp
Assuming the following content for the file cities.txt:
.Bd -literal -offset indent
madrid
Madrid
Lisbon
.Ed
.Pp
Show repeated lines ignoring case sensitiveness:
.Bd -literal -offset indent
$ uniq -d -i cities.txt
madrid
.Ed
.Pp
Same as above but showing the whole group of repeated lines:
.Bd -literal -offset indent
$ uniq -D -i cities.txt
madrid
Madrid
.Ed
.Pp
Report the number of identical lines ignoring the first character of every line:
.Bd -literal -offset indent
$ uniq -s 1 -c cities.txt
2 madrid
1 Lisbon
.Ed
.Sh COMPATIBILITY
The historic
.Cm \&\(pl Ns Ar number
and
.Fl Ns Ar number
options have been deprecated but are still supported in this implementation.
.Sh SEE ALSO
.Xr sort 1
.Sh STANDARDS
The
.Nm
utility conforms to
.St -p1003.1-2001
as amended by Cor.\& 1-2002.
.Sh HISTORY
A
.Nm
command appeared in
.At v3 .
diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c
index 55766f7e987e..0bc9b2b86af3 100644
--- a/usr.bin/uniq/uniq.c
+++ b/usr.bin/uniq/uniq.c
@@ -1,364 +1,373 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Case Larsen.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
+#include
#include
#include
#include
#include
#include
#include
#include
#include
-static int Dflag, cflag, dflag, uflag, iflag;
-static int numchars, numfields, repeats;
-
-/* Dflag values */
-#define DF_NONE 0
-#define DF_NOSEP 1
-#define DF_PRESEP 2
-#define DF_POSTSEP 3
+static enum { DF_NONE, DF_NOSEP, DF_PRESEP, DF_POSTSEP } Dflag;
+static bool cflag, dflag, uflag, iflag;
+static long long numchars, numfields, repeats;
static const struct option long_opts[] =
{
{"all-repeated",optional_argument, NULL, 'D'},
{"count", no_argument, NULL, 'c'},
{"repeated", no_argument, NULL, 'd'},
{"skip-fields", required_argument, NULL, 'f'},
{"ignore-case", no_argument, NULL, 'i'},
{"skip-chars", required_argument, NULL, 's'},
{"unique", no_argument, NULL, 'u'},
{NULL, no_argument, NULL, 0}
};
static FILE *file(const char *, const char *);
static wchar_t *convert(const char *);
static int inlcmp(const char *, const char *);
static void show(FILE *, const char *);
static wchar_t *skip(wchar_t *);
static void obsolete(char *[]);
static void usage(void);
int
main (int argc, char *argv[])
{
wchar_t *tprev, *tthis;
FILE *ifp, *ofp;
int ch, comp;
size_t prevbuflen, thisbuflen, b1;
char *prevline, *thisline, *p;
- const char *ifn, *errstr;;
+ const char *errstr, *ifn;
cap_rights_t rights;
(void) setlocale(LC_ALL, "");
obsolete(argv);
while ((ch = getopt_long(argc, argv, "+D::cdif:s:u", long_opts,
NULL)) != -1)
switch (ch) {
case 'D':
if (optarg == NULL || strcasecmp(optarg, "none") == 0)
Dflag = DF_NOSEP;
else if (strcasecmp(optarg, "prepend") == 0)
Dflag = DF_PRESEP;
else if (strcasecmp(optarg, "separate") == 0)
Dflag = DF_POSTSEP;
else
usage();
break;
case 'c':
- cflag = 1;
+ cflag = true;
break;
case 'd':
- dflag = 1;
+ dflag = true;
break;
case 'i':
- iflag = 1;
+ iflag = true;
break;
case 'f':
numfields = strtonum(optarg, 0, INT_MAX, &errstr);
if (errstr)
errx(1, "field skip value is %s: %s", errstr, optarg);
break;
case 's':
numchars = strtonum(optarg, 0, INT_MAX, &errstr);
if (errstr != NULL)
errx(1, "character skip value is %s: %s", errstr, optarg);
break;
case 'u':
- uflag = 1;
+ uflag = true;
break;
case '?':
default:
usage();
}
argc -= optind;
argv += optind;
if (argc > 2)
usage();
+ if (Dflag && dflag)
+ dflag = false;
+
ifp = stdin;
ifn = "stdin";
ofp = stdout;
if (argc > 0 && strcmp(argv[0], "-") != 0)
ifp = file(ifn = argv[0], "r");
cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
if (caph_rights_limit(fileno(ifp), &rights) < 0)
err(1, "unable to limit rights for %s", ifn);
cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
if (argc > 1)
ofp = file(argv[1], "w");
else
cap_rights_set(&rights, CAP_IOCTL);
if (caph_rights_limit(fileno(ofp), &rights) < 0) {
err(1, "unable to limit rights for %s",
argc > 1 ? argv[1] : "stdout");
}
if (cap_rights_is_set(&rights, CAP_IOCTL)) {
unsigned long cmd;
cmd = TIOCGETA; /* required by isatty(3) in printf(3) */
if (caph_ioctls_limit(fileno(ofp), &cmd, 1) < 0) {
err(1, "unable to limit ioctls for %s",
argc > 1 ? argv[1] : "stdout");
}
}
caph_cache_catpages();
if (caph_enter() < 0)
err(1, "unable to enter capability mode");
prevbuflen = thisbuflen = 0;
prevline = thisline = NULL;
if (getline(&prevline, &prevbuflen, ifp) < 0) {
if (ferror(ifp))
err(1, "%s", ifn);
exit(0);
}
+ if (!cflag && !Dflag && !dflag && !uflag)
+ show(ofp, prevline);
tprev = convert(prevline);
tthis = NULL;
while (getline(&thisline, &thisbuflen, ifp) >= 0) {
if (tthis != NULL)
free(tthis);
tthis = convert(thisline);
if (tthis == NULL && tprev == NULL)
comp = inlcmp(thisline, prevline);
else if (tthis == NULL || tprev == NULL)
comp = 1;
else
comp = wcscoll(tthis, tprev);
if (comp) {
/* If different, print; set previous to new value. */
if (Dflag == DF_POSTSEP && repeats > 0)
fputc('\n', ofp);
- if (!Dflag)
+ if (!cflag && !Dflag && !dflag && !uflag)
+ show(ofp, thisline);
+ else if (!Dflag &&
+ (!dflag || (cflag && repeats > 0)) &&
+ (!uflag || repeats == 0))
show(ofp, prevline);
p = prevline;
b1 = prevbuflen;
prevline = thisline;
prevbuflen = thisbuflen;
if (tprev != NULL)
free(tprev);
tprev = tthis;
thisline = p;
thisbuflen = b1;
tthis = NULL;
repeats = 0;
} else {
if (Dflag) {
if (repeats == 0) {
if (Dflag == DF_PRESEP)
fputc('\n', ofp);
show(ofp, prevline);
}
show(ofp, thisline);
+ } else if (dflag && !cflag) {
+ if (repeats == 0)
+ show(ofp, prevline);
}
++repeats;
}
}
if (ferror(ifp))
err(1, "%s", ifn);
- if (!Dflag)
+ if (!cflag && !Dflag && !dflag && !uflag)
+ /* already printed */ ;
+ else if (!Dflag &&
+ (!dflag || (cflag && repeats > 0)) &&
+ (!uflag || repeats == 0))
show(ofp, prevline);
exit(0);
}
static wchar_t *
convert(const char *str)
{
size_t n;
wchar_t *buf, *ret, *p;
if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
return (NULL);
if (SIZE_MAX / sizeof(*buf) < n + 1)
errx(1, "conversion buffer length overflow");
if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
err(1, "malloc");
if (mbstowcs(buf, str, n + 1) != n)
errx(1, "internal mbstowcs() error");
/* The last line may not end with \n. */
if (n > 0 && buf[n - 1] == L'\n')
buf[n - 1] = L'\0';
/* If requested get the chosen fields + character offsets. */
if (numfields || numchars) {
if ((ret = wcsdup(skip(buf))) == NULL)
err(1, "wcsdup");
free(buf);
} else
ret = buf;
if (iflag) {
for (p = ret; *p != L'\0'; p++)
*p = towlower(*p);
}
return (ret);
}
static int
inlcmp(const char *s1, const char *s2)
{
int c1, c2;
while (*s1 == *s2++)
if (*s1++ == '\0')
return (0);
c1 = (unsigned char)*s1;
c2 = (unsigned char)*(s2 - 1);
/* The last line may not end with \n. */
if (c1 == '\n')
c1 = '\0';
if (c2 == '\n')
c2 = '\0';
return (c1 - c2);
}
/*
* show --
* Output a line depending on the flags and number of repetitions
* of the line.
*/
static void
show(FILE *ofp, const char *str)
{
-
- if ((!Dflag && dflag && repeats == 0) || (uflag && repeats > 0))
- return;
if (cflag)
- (void)fprintf(ofp, "%4d %s", repeats + 1, str);
+ (void)fprintf(ofp, "%4lld %s", repeats + 1, str);
else
(void)fprintf(ofp, "%s", str);
}
static wchar_t *
skip(wchar_t *str)
{
- int nchars, nfields;
+ long long nchars, nfields;
for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
while (iswblank(*str))
str++;
while (*str != L'\0' && !iswblank(*str))
str++;
}
for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
;
return(str);
}
static FILE *
file(const char *name, const char *mode)
{
FILE *fp;
if ((fp = fopen(name, mode)) == NULL)
err(1, "%s", name);
return(fp);
}
static void
obsolete(char *argv[])
{
int len;
char *ap, *p, *start;
while ((ap = *++argv)) {
/* Return if "--" or not an option of any form. */
if (ap[0] != '-') {
if (ap[0] != '+')
return;
} else if (ap[1] == '-')
return;
if (!isdigit((unsigned char)ap[1]))
continue;
/*
* Digit signifies an old-style option. Malloc space for dash,
* new option and argument.
*/
len = strlen(ap);
if ((start = p = malloc(len + 3)) == NULL)
err(1, "malloc");
*p++ = '-';
*p++ = ap[0] == '+' ? 's' : 'f';
(void)strcpy(p, ap + 1);
*argv = start;
}
}
static void
usage(void)
{
(void)fprintf(stderr,
"usage: uniq [-c | -d | -D | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
exit(1);
}