diff --git a/usr.bin/find/Makefile b/usr.bin/find/Makefile --- a/usr.bin/find/Makefile +++ b/usr.bin/find/Makefile @@ -6,13 +6,15 @@ PACKAGE= runtime PROG= find -SRCS= find.c function.c ls.c main.c misc.c operator.c option.c \ +SRCS= find.c function.c ls.c main.c misc.c operator.c option.c printf.c \ getdate.y YFLAGS= CFLAGS.clang+= -Werror=undef NO_WMISSING_VARIABLE_DECLARATIONS= +CFLAGS+= -g +LIBADD= sbuf HAS_TESTS= SUBDIR.${MK_TESTS}+= tests diff --git a/usr.bin/find/extern.h b/usr.bin/find/extern.h --- a/usr.bin/find/extern.h +++ b/usr.bin/find/extern.h @@ -47,6 +47,8 @@ int queryuser(char **); OPTION *lookup_option(const char *); void finish_execplus(void); +void do_printf(PLAN *plan, FTSENT *entry, FILE *fout); + creat_f c_Xmin; creat_f c_Xtime; @@ -71,6 +73,7 @@ creat_f c_nouser; creat_f c_perm; creat_f c_print; +creat_f c_printf; creat_f c_regex; creat_f c_samefile; creat_f c_simple; @@ -108,6 +111,7 @@ exec_f f_perm; exec_f f_print; exec_f f_print0; +exec_f f_printf; exec_f f_prune; exec_f f_quit; exec_f f_regex; diff --git a/usr.bin/find/find.h b/usr.bin/find/find.h --- a/usr.bin/find/find.h +++ b/usr.bin/find/find.h @@ -100,6 +100,8 @@ #define F_TIME2_B 0x00080000 /* one of -newer?B */ #endif #define F_LINK 0x00100000 /* lname or ilname */ +/* Notes about execution */ +#define F_HAS_WARNED 0x10000000 /* Has issued a warning for maybe bad input */ /* node definition */ typedef struct _plandata { diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1 --- a/usr.bin/find/find.1 +++ b/usr.bin/find/find.1 @@ -799,6 +799,17 @@ ASCII .Dv NUL character (character code 0). +.It Ic -printf Ar fmt +This primary always evaluates to true. +It prints information about the file, interpreting +.Sq \ +and +.Sq % +escape sequences as described in the PRINTF FORMATS section. +Unlike +.Ic -print , +.Ic -printf +does not add a newline automatically. .It Ic -prune This primary always evaluates to true. It causes @@ -959,6 +970,79 @@ Primaries which themselves take arguments expect each argument to be a separate argument to .Nm . +.Sh PRINTF FORMATS +The following +.Sq \e +escapes are recognized: +.Bl -tag -width Ds -offset indent -compact +.It Cm \ea +Write a character. +.It Cm \eb +Write a character. +.It Cm \ec +Writes no characters, but terminates the string and flushes the output so far +after each match. +.It Cm \ef +Write a character. +.It Cm \en +Write a character. +.It Cm \er +Write a character. +.It Cm \et +Write a character. +.It Cm \ev +Write a character. +.It Cm \e\' +Write a character. +.It Cm \e\e +Write a backslash character. +.It Cm \e Ns Ar num +Write a byte whose +value is the 1-, 2-, or 3-digit +octal number +.Ar num . +Multibyte characters can be constructed using multiple +.Cm \e Ns Ar num +sequences. +.El +.Pp +Each format specification is introduced by the percent character +(``%''). +The remainder of the format specification includes, +in the following order: +.Bl -tag -width Ds +.It "Zero or more of the following flags:" +.Bl -tag -width Ds +.It Cm # +A `#' character, has no effect on almost all formats. +It is not yet implemented. +.It Cm \&\- +A minus sign `\-' which specifies +.Em left adjustment +of the output in the indicated field; +It is not yet implemented. +.It "Field Width:" +An optional digit string specifying a +.Em field width ; +if the output string has fewer bytes than the field width it will +be blank-padded on the left (or right, if the left-adjustment indicator +has been given) to make up the field width (note that a leading zero +is a flag, but an embedded zero is part of a field width); +It is not yet implemented. +.It Precision: +An optional period, +.Sq Cm \&.\& , +followed by an optional digit string giving a +.Em precision +which specifies the maximum number of bytes to be printed +from a string; if the digit string is missing, the precision is treated +as zero; +It is not yet implemented. +.It Format: +One or two characters, described below, which indicates the information to display. +XXX need to write this. +.El +.El .Sh ENVIRONMENT The .Ev LANG , LC_ALL , LC_COLLATE , LC_CTYPE , LC_MESSAGES diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c --- a/usr.bin/find/function.c +++ b/usr.bin/find/function.c @@ -1395,6 +1395,33 @@ /* c_print0 is the same as c_print */ +/* + * -printf functions -- + * + * Always true. Causes information as specified in the + * argument to be written to standard output. + */ +int +f_printf(PLAN *plan, FTSENT *entry) +{ + do_printf(plan, entry, stdout); + return 1; +} + +PLAN * +c_printf(OPTION *option, char ***argvp) +{ + PLAN *new; + + isoutput = 1; + ftsoptions &= ~FTS_NOSTAT; + + new = palloc(option); + new->c_data = nextarg(option, argvp); + + return (new); +} + /* * -prune functions -- * diff --git a/usr.bin/find/option.c b/usr.bin/find/option.c --- a/usr.bin/find/option.c +++ b/usr.bin/find/option.c @@ -153,7 +153,7 @@ { "-perm", c_perm, f_perm, 0 }, { "-print", c_print, f_print, 0 }, { "-print0", c_print, f_print0, 0 }, -// -printf + { "-printf", c_printf, f_printf, 0 }, { "-prune", c_simple, f_prune, 0 }, { "-quit", c_simple, f_quit, 0 }, { "-regex", c_regex, f_regex, 0 }, diff --git a/usr.bin/find/printf.c b/usr.bin/find/printf.c new file mode 100644 --- /dev/null +++ b/usr.bin/find/printf.c @@ -0,0 +1,301 @@ +/*- + * Copyright (c) 2023, Netflix, Inc + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "find.h" + +/* translate \X to proper escape, or to itself if no special meaning */ +static const char *esc = "\a\bcde\fghijklm\nopq\rs\tu\v"; + +static inline bool +isoct(char c) +{ + return (c >= '0' && c <= '7'); +} + +static inline bool +isesc(char c) +{ + return (c >= 'a' && c <= 'v' && esc[c - 'a'] != c); +} + +static struct sbuf * +escape(const char *str, bool *flush, bool *warned) +{ + char c; + int value; + + struct sbuf *newsb; + + /* + * Copy the str string into a new struct sbuf and return that expanding + * the different ANSI escape sequences. + */ + newsb = sbuf_new_auto(); + *flush = false; + for (c = *str++; c; c = *str++) { + if (c != '\\') { + sbuf_putc(newsb, c); + continue; + } + c = *str++; + + /* + * User error \ at end of string + */ + if (c == '\0') { + sbuf_putc(newsb, '\\'); + break; + } + + /* + * \c terminates output now and is supposed to flush the output + * too... + */ + if (c == 'c') { + *flush = true; + break; + } + + /* + * Is it octal? If so, decode up to 3 octal characters. + */ + if (isoct(c)) { + value = 0; + for (int i = 3; i-- > 0 && isoct(c); + c = *str++) { + value <<= 3; + value += c - '0'; + } + str--; + sbuf_putc(newsb, (char)value); + continue; + } + + /* + * It's an ANSI X3.159-1989 escape, use the mini-escape lookup + * table to translate. + */ + if (isesc(c)) { + sbuf_putc(newsb, esc[c - 'a']); + continue; + } + + /* + * Otherwise, it's self inserting. gnu find specifically says + * not to rely on this behavior though. gnu find will issue + * a warning here, while printf(1) won't. + */ + if (!*warned) { + warn("Unknown character %c after \\.", c); + *warned = true; + } + sbuf_putc(newsb, c); + } + + sbuf_finish(newsb); + return (newsb); +} + +static void +sbuf_ctime(struct sbuf *sb, time_t t) +{ + char s[26]; + + ctime_r(&t, s); + s[24] = '\0'; /* kill newline, though gnu find info silent on issue */ + sbuf_cat(sb, s); +} + +/* + * Assumes all times are displayed in UTC rather than local time, gnu find info + * page silent on the issue. + * + * Also assumes that gnu find doesn't support multiple character escape sequences, + * which it's info page is also silent on. + */ +static void +sbuf_strftime(struct sbuf *sb, time_t t, char mod) +{ + struct tm tm; + char buffer[128]; + char fmt[3] = "% "; + + /* + * Gnu libc extension we don't yet support -- seconds since epoch + * Used in Linux kernel build, so we kinda have to support it here + */ + if (mod == '@') { + sbuf_printf(sb, "%ju", (uintmax_t)t); + return; + } + + gmtime_r(&t, &tm); + fmt[1] = mod; + printf("fmt is '%s'\n", fmt); + if (strftime(buffer, sizeof(buffer), fmt, &tm) == 0) + errx(1, "Format bad or data too long for buffer"); /* Can't really happen ??? */ + sbuf_cat(sb, buffer); +} + +void +do_printf(PLAN *plan, FTSENT *entry, FILE *fout) +{ + const char *fmt, *path, *pend, *all; + char c; + struct sbuf *fmtsb, *out; + bool flush, warned; + struct stat *sb; + + fmt = plan->c_data; + out = sbuf_new_auto(); + warned = (plan->flags & F_HAS_WARNED) != 0; + fmtsb = escape(fmt, &flush, &warned); + if (warned) + plan->flags |= F_HAS_WARNED; + all = fmt = sbuf_data(fmtsb); + sb = entry->fts_statp; + for (c = *fmt++; c; c = *fmt++) { + if (c != '%') { + sbuf_putc(out, c); + continue; + } + c = *fmt++; + /* Style(9) deviation: case order same as gnu find info doc */ + switch (c) { + case '%': + sbuf_putc(out, c); + break; + case 'p': /* Path to file */ + sbuf_cat(out, entry->fts_path); + break; + case 'f': /* filename w/o dirs */ + sbuf_cat(out, entry->fts_name); + break; + case 'h': + /* + * path, relative to the starting point, of the file, or + * '.' if that's empty for some reason. + */ + path = entry->fts_path; + pend = strrchr(path, '/'); + if (pend == NULL) + sbuf_putc(out, '.'); + else + sbuf_bcat(out, path, pend - path); + break; + case 'P': /* file with command line arg rm'd -- HOW? fts_parent? */ + errx(1, "%%%c is unimplemented", c); + case 'H': /* Command line arg -- HOW? */ + errx(1, "%%%c is unimplemented", c); + case 'g': /* gid human readable */ + sbuf_cat(out, group_from_gid(sb->st_gid, 0)); + break; + case 'G': /* gid numeric */ + sbuf_printf(out, "%d", sb->st_gid); + break; + case 'u': /* uid human readable */ + sbuf_cat(out, user_from_uid(sb->st_uid, 0)); + break; + case 'U': /* uid numeric */ + sbuf_printf(out, "%d", sb->st_uid); + break; + case 'm': /* mode in octal */ + sbuf_printf(out, "%o", sb->st_mode & 07777); + break; + case 'M': { /* Mode in ls-standard form */ + char mode[12]; + strmode(sb->st_mode, mode); + sbuf_cat(out, mode); + break; + } + case 'k': /* kbytes used by file */ + sbuf_printf(out, "%ld", sb->st_blocks / 2); + break; + case 'b': /* blocks used by file */ + sbuf_printf(out, "%ld", sb->st_blocks); + break; + case 's': /* size in bytes of file */ + sbuf_printf(out, "%zu", sb->st_size); + break; + case 'S': /* sparseness of file */ + sbuf_printf(out, "%3.1f", + (float)sb->st_blocks * 512 / (float)sb->st_size); + break; + case 'd': /* Depth in tree */ + sbuf_printf(out, "%ld", entry->fts_level); + break; + case 'D': /* device number */ + sbuf_printf(out, "%lu", sb->st_dev); + break; + case 'F': /* Filesystem type */ + errx(1, "%%%c is unimplemented", c); + case 'l': /* object of symbolic link */ + sbuf_printf(out, "%s", entry->fts_accpath); + break; + case 'i': /* inode # */ + sbuf_printf(out, "%lu", sb->st_ino); + break; + case 'n': /* number of hard links */ + sbuf_printf(out, "%lu", sb->st_nlink); + break; + case 'y': /* -type of file, incl 'l' */ + errx(1, "%%%c is unimplemented", c); + case 'Y': /* -type of file, following 'l' types L loop ? error */ + errx(1, "%%%c is unimplemented", c); + case 'a': /* access time ctime */ + sbuf_ctime(out, sb->st_atime); + break; + case 'A': /* access time with next char strftime format */ + sbuf_strftime(out, sb->st_atime, *fmt++); + break; + case 'B': /* birth time with next char strftime format */ +#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME + if (sb->st_birthtime != 0) + sbuf_strftime(out, sb->st_birthtime, *fmt); +#endif + fmt++; + break; /* blank on systems that don't support it */ + case 'c': /* status change time ctime */ + sbuf_ctime(out, sb->st_ctime); + break; + case 'C': /* statuc change time with next char strftime format */ + sbuf_strftime(out, sb->st_ctime, *fmt++); + break; + case 't': /* modification change time ctime */ + sbuf_ctime(out, sb->st_mtime); + break; + case 'T': /* modification time with next char strftime format */ + sbuf_strftime(out, sb->st_mtime, *fmt++); + break; + case 'Z': /* empty string for compat SELinux context string */ + break; + /* Modifier parsing here, but also need to modify above somehow */ + case '#': case '-': case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': case '.': + errx(1, "Format modifier %c not yet supported: '%s'", c, all); + /* Any FeeeBSD-specific modifications here -- none yet */ + default: + errx(1, "Unknown format %c '%s'", c, all); + } + } + sbuf_finish(out); + fputs(sbuf_data(out), fout); + if (flush) + fflush(fout); + sbuf_delete(out); + sbuf_delete(fmtsb); +}