diff --git a/contrib/vis/vis.1 b/contrib/vis/vis.1 index 803d34eda5f9..f654a516aeb0 100644 --- a/contrib/vis/vis.1 +++ b/contrib/vis/vis.1 @@ -1,189 +1,205 @@ -.\" $NetBSD: vis.1,v 1.19 2013/03/14 19:17:23 njoly Exp $ +.\" $NetBSD: vis.1,v 1.25 2021/02/20 09:31:51 nia Exp $ .\" .\" Copyright (c) 1989, 1991, 1993, 1994 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)vis.1 8.4 (Berkeley) 4/19/94 .\" -.Dd February 19, 2013 +.Dd February 18, 2021 .Dt VIS 1 .Os .Sh NAME .Nm vis .Nd display non-printable characters in a visual format .Sh SYNOPSIS .Nm -.Op Fl bcfhlmnostw +.Op Fl bcfhlMmNnoSstw .Op Fl e Ar extra .Op Fl F Ar foldwidth .Op Ar file ... .Sh DESCRIPTION .Nm is a filter for converting non-printable characters into a visual representation. It differs from .Ql cat -v in that the form is unique and invertible. By default, all non-graphic characters except space, tab, and newline are encoded. A detailed description of the various visual formats is given in .Xr vis 3 . .Pp The options are as follows: .Bl -tag -width Ds .It Fl b Turns off prepending of backslash before up-arrow control sequences and meta characters, and disables the doubling of backslashes. This produces output which is neither invertible or precise, but does represent a minimum of change to the input. It is similar to .Dq Li cat -v . .Pq Dv VIS_NOSLASH .It Fl c Request a format which displays a small subset of the non-printable characters using C-style backslash sequences. .Pq Dv VIS_CSTYLE .It Fl e Ar extra Also encode characters in .Ar extra , per .Xr svis 3 . .It Fl F Ar foldwidth Causes .Nm to fold output lines to foldwidth columns (default 80), like .Xr fold 1 , except that a hidden newline sequence is used, (which is removed when inverting the file back to its original form with .Xr unvis 1 ) . If the last character in the encoded file does not end in a newline, a hidden newline sequence is appended to the output. This makes the output usable with various editors and other utilities which typically don't work with partial lines. .It Fl f Same as .Fl F . .It Fl h Encode using the URI encoding from RFC 1808. .Pq Dv VIS_HTTPSTYLE .It Fl l Mark newlines with the visible sequence .Ql \e$ , followed by the newline. +.It Fl M +Encode all shell meta characters (implies +.Fl S , +.Fl w , +.Fl g ) +.Pq Dv VIS_META .It Fl m Encode using the MIME Quoted-Printable encoding from RFC 2045. .Pq Dv VIS_MIMESTYLE +.It Fl N +Turn on the +.Dv VIS_NOLOCALE +flag which encodes using the +.Dq C +locale, removing any encoding dependencies caused by the current +locale settings specified in the environment. .It Fl n Turns off any encoding, except for the fact that backslashes are still doubled and hidden newline sequences inserted if .Fl f or .Fl F is selected. When combined with the .Fl f flag, .Nm becomes like an invertible version of the .Xr fold 1 utility. That is, the output can be unfolded by running the output through .Xr unvis 1 . .It Fl o Request a format which displays non-printable characters as an octal number, \eddd. .Pq Dv VIS_OCTAL +.It Fl S +Encode shell meta-characters that are non-white space or glob. +.Pq Dv VIS_SHELL .It Fl s Only characters considered unsafe to send to a terminal are encoded. This flag allows backspace, bell, and carriage return in addition to the default space, tab and newline. .Pq Dv VIS_SAFE .It Fl t Tabs are also encoded. .Pq Dv VIS_TAB .It Fl w White space (space-tab-newline) is also encoded. .Pq Dv VIS_WHITE .El .Sh MULTIBYTE CHARACTER SUPPORT .Nm supports multibyte character input. The encoding conversion is influenced by the setting of the .Ev LC_CTYPE environment variable which defines the set of characters that can be copied without encoding. .Pp When 8-bit data is present in the input, .Ev LC_CTYPE must be set to the correct locale or to the C locale. If the locales of the data and the conversion are mismatched, multibyte character recognition may fail and encoding will be performed byte-by-byte instead. .Sh ENVIRONMENT .Bl -tag -width ".Ev LC_CTYPE" .It Ev LC_CTYPE Specify the locale of the input data. Set to C if the input data locale is unknown. .El .Sh EXAMPLES Visualize characters encoding white spaces and tabs: .Bd -literal -offset indent $ echo -e "\\x10\\n\\t" | vis -w -t \\^P\\012\\011\\012 .Ed .Pp Same as above but using `\\$' for newline followed by an actual newline: .Bd -literal -offset indent $ echo -e "\\x10\\n\\t" | vis -w -t -l \\^P\\$ \\011\\$ .Ed .Pp Visualize string using URI encoding: .Bd -literal -offset indent $ echo http://www.freebsd.org | vis -h http%3a%2f%2fwww.freebsd.org%0a .Ed .Sh SEE ALSO .Xr unvis 1 , .Xr svis 3 , .Xr vis 3 .Sh HISTORY The .Nm command appears in .Bx 4.4 . -Myltibyte character support was added in +Multibyte character support was added in .Nx 7.0 and .Fx 9.2 . diff --git a/contrib/vis/vis.c b/contrib/vis/vis.c index 1509c81c4123..6e3ea11a2782 100644 --- a/contrib/vis/vis.c +++ b/contrib/vis/vis.c @@ -1,276 +1,285 @@ -/* $NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $ */ +/* $NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $ */ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #ifndef lint __COPYRIGHT("@(#) Copyright (c) 1989, 1993\ The Regents of the University of California. All rights reserved."); #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)vis.c 8.1 (Berkeley) 6/6/93"; #endif -__RCSID("$NetBSD: vis.c,v 1.22 2013/02/20 17:04:45 christos Exp $"); +__RCSID("$NetBSD: vis.c,v 1.25 2015/05/24 19:42:39 christos Exp $"); #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include "extern.h" static int eflags, fold, foldwidth = 80, none, markeol; #ifdef DEBUG int debug; #endif static const char *extra = ""; static void process(FILE *); int main(int argc, char *argv[]) { FILE *fp; int ch; int rval; - while ((ch = getopt(argc, argv, "bcde:F:fhlmnostw")) != -1) + while ((ch = getopt(argc, argv, "bcde:F:fhlMmNnoSstw")) != -1) switch((char)ch) { case 'b': eflags |= VIS_NOSLASH; break; case 'c': eflags |= VIS_CSTYLE; break; #ifdef DEBUG case 'd': debug++; break; #endif case 'e': extra = optarg; break; case 'F': if ((foldwidth = atoi(optarg)) < 5) { errx(1, "can't fold lines to less than 5 cols"); /* NOTREACHED */ } markeol++; break; case 'f': fold++; /* fold output lines to 80 cols */ break; /* using hidden newline */ case 'h': eflags |= VIS_HTTPSTYLE; break; case 'l': markeol++; /* mark end of line with \$ */ break; + case 'M': + eflags |= VIS_META; + break; case 'm': eflags |= VIS_MIMESTYLE; if (foldwidth == 80) foldwidth = 76; break; + case 'N': + eflags |= VIS_NOLOCALE; + break; case 'n': none++; break; case 'o': eflags |= VIS_OCTAL; break; + case 'S': + eflags |= VIS_SHELL; + break; case 's': eflags |= VIS_SAFE; break; case 't': eflags |= VIS_TAB; break; case 'w': eflags |= VIS_WHITE; break; case '?': default: (void)fprintf(stderr, - "Usage: %s [-bcfhlmnostw] [-e extra]" + "Usage: %s [-bcfhlMmNnoSstw] [-e extra]" " [-F foldwidth] [file ...]\n", getprogname()); return 1; } if ((eflags & (VIS_HTTPSTYLE|VIS_MIMESTYLE)) == (VIS_HTTPSTYLE|VIS_MIMESTYLE)) errx(1, "Can't specify -m and -h at the same time"); argc -= optind; argv += optind; rval = 0; if (*argv) while (*argv) { if ((fp = fopen(*argv, "r")) != NULL) { process(fp); (void)fclose(fp); } else { warn("%s", *argv); rval = 1; } argv++; } else process(stdin); return rval; } static void process(FILE *fp) { static int col = 0; static char nul[] = "\0"; char *cp = nul + 1; /* so *(cp-1) starts out != '\n' */ wint_t c, c1, rachar; char mbibuff[2 * MB_LEN_MAX + 1]; /* max space for 2 wchars */ char buff[4 * MB_LEN_MAX + 1]; /* max encoding length for one char */ int mbilen, cerr = 0, raerr = 0; /* * The input stream is considered to be multibyte characters. * The input loop will read this data inputing one character, * possibly multiple bytes, at a time and converting each to * a wide character wchar_t. * * The vis(3) functions, however, require single either bytes * or a multibyte string as their arguments. So we convert * our input wchar_t and the following look-ahead wchar_t to * a multibyte string for processing by vis(3). */ /* Read one multibyte character, store as wchar_t */ c = getwc(fp); if (c == WEOF && errno == EILSEQ) { /* Error in multibyte data. Read one byte. */ c = (wint_t)getc(fp); cerr = 1; } while (c != WEOF) { /* Clear multibyte input buffer. */ memset(mbibuff, 0, sizeof(mbibuff)); /* Read-ahead next multibyte character. */ if (!cerr) rachar = getwc(fp); if (cerr || (rachar == WEOF && errno == EILSEQ)) { /* Error in multibyte data. Read one byte. */ rachar = (wint_t)getc(fp); raerr = 1; } if (none) { /* Handle -n flag. */ cp = buff; *cp++ = c; if (c == '\\') *cp++ = '\\'; *cp = '\0'; } else if (markeol && c == '\n') { /* Handle -l flag. */ cp = buff; if ((eflags & VIS_NOSLASH) == 0) *cp++ = '\\'; *cp++ = '$'; *cp++ = '\n'; *cp = '\0'; } else { /* * Convert character using vis(3) library. * At this point we will process one character. * But we must pass the vis(3) library this * character plus the next one because the next * one is used as a look-ahead to decide how to * encode this one under certain circumstances. * * Since our characters may be multibyte, e.g., * in the UTF-8 locale, we cannot use vis() and * svis() which require byte input, so we must * create a multibyte string and use strvisx(). */ /* Treat EOF as a NUL char. */ c1 = rachar; if (c1 == WEOF) c1 = L'\0'; /* * If we hit a multibyte conversion error above, * insert byte directly into string buff because * wctomb() will fail. Else convert wchar_t to * multibyte using wctomb(). */ if (cerr) { *mbibuff = (char)c; mbilen = 1; } else mbilen = wctomb(mbibuff, c); /* Same for look-ahead character. */ if (raerr) mbibuff[mbilen] = (char)c1; else wctomb(mbibuff + mbilen, c1); /* Perform encoding on just first character. */ (void) strsenvisx(buff, 4 * MB_LEN_MAX, mbibuff, 1, eflags, extra, &cerr); } cp = buff; if (fold) { #ifdef DEBUG if (debug) (void)printf("<%02d,", col); #endif col = foldit(cp, col, foldwidth, eflags); #ifdef DEBUG if (debug) (void)printf("%02d>", col); #endif } do { (void)putchar(*cp); } while (*++cp); c = rachar; cerr = raerr; } /* * terminate partial line with a hidden newline */ if (fold && *(cp - 1) != '\n') (void)printf(eflags & VIS_MIMESTYLE ? "=\n" : "\\\n"); }