Index: contrib/netbsd-tests/usr.bin/grep/t_grep.sh =================================================================== --- contrib/netbsd-tests/usr.bin/grep/t_grep.sh +++ contrib/netbsd-tests/usr.bin/grep/t_grep.sh @@ -391,7 +391,33 @@ atf_check -o inline:"Eggs\nCheese\n" grep -v -e "^$" test1 } -# End FreeBSD + +atf_test_case wflag_emptypat +wflag_emptypat_head() +{ + atf_set "descr" "Check for proper handling of -w with an empty pattern (PR 105221)" +} +wflag_emptypat_body() +{ + grep_type + if [ $type -eq $GREP_TYPE_GNU_FREEBSD ]; then + atf_expect_fail "this test does not pass with GNU grep in base" + fi + + printf "" > test1 + printf "\n" > test2 + printf "qaz" > test3 + printf " qaz\n" > test4 + + atf_check -s exit:1 -o empty grep -w -e "" test1 + + atf_check -o file:test2 grep -w -e "" test2 + + atf_check -s exit:1 -o empty grep -w -e "" test3 + + atf_check -o file:test4 grep -w -e "" test4 +} +#End FreeBSD atf_init_test_cases() { @@ -419,5 +445,6 @@ atf_add_test_case escmap atf_add_test_case egrep_empty_invalid atf_add_test_case zerolen + atf_add_test_case wflag_emptypat # End FreeBSD } Index: usr.bin/grep/grep.h =================================================================== --- usr.bin/grep/grep.h +++ usr.bin/grep/grep.h @@ -124,7 +124,6 @@ extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave; extern bool file_err, first, matchall, prev; -extern int tail; extern unsigned int dpatterns, fpatterns, patterns; extern struct pat *pattern; extern struct epat *dpattern, *fpattern; @@ -145,7 +144,7 @@ void *grep_calloc(size_t nmemb, size_t size); void *grep_realloc(void *ptr, size_t size); char *grep_strdup(const char *str); -void printline(struct str *line, int sep, regmatch_t *matches, int m); +void grep_printline(struct str *line, int sep); /* queue.c */ void enqueue(struct str *x); Index: usr.bin/grep/grep.c =================================================================== --- usr.bin/grep/grep.c +++ usr.bin/grep/grep.c @@ -80,7 +80,13 @@ int cflags = REG_NOSUB; int eflags = REG_STARTEND; -/* Shortcut for matching all cases like empty regex */ +/* XXX TODO: Get rid of this flag. + * matchall is a gross hack that means that an empty pattern was passed to us. + * It is a necessary evil at the moment because our regex(3) implementation + * does not allow for empty patterns, as supported by POSIX's definition of + * grammar for BREs/EREs. When libregex becomes available, it would be wise + * to remove this and let regex(3) handle the dirty details of empty patterns. + */ bool matchall; /* Searching patterns */ @@ -154,7 +160,6 @@ /* Housekeeping */ bool first = true; /* flag whether we are processing the first match */ bool prev; /* flag whether or not the previous line matched */ -int tail; /* lines left to print */ bool file_err; /* file reading error */ /* @@ -724,20 +729,23 @@ #endif r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); - /* Check if cheating is allowed (always is for fgrep). */ - for (i = 0; i < patterns; ++i) { + /* Don't process any patterns if we have a blank one */ + if (!matchall) { + /* Check if cheating is allowed (always is for fgrep). */ + for (i = 0; i < patterns; ++i) { #ifndef WITHOUT_FASTMATCH - /* Attempt compilation with fastmatch regex and fallback to - regex(3) if it fails. */ - if (fastncomp(&fg_pattern[i], pattern[i].pat, - pattern[i].len, cflags) == 0) - continue; + /* Attempt compilation with fastmatch regex and fallback to + regex(3) if it fails. */ + if (fastncomp(&fg_pattern[i], pattern[i].pat, + pattern[i].len, cflags) == 0) + continue; #endif - c = regcomp(&r_pattern[i], pattern[i].pat, cflags); - if (c != 0) { - regerror(c, &r_pattern[i], re_error, - RE_ERROR_BUF); - errx(2, "%s", re_error); + c = regcomp(&r_pattern[i], pattern[i].pat, cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); + } } } Index: usr.bin/grep/queue.c =================================================================== --- usr.bin/grep/queue.c +++ usr.bin/grep/queue.c @@ -95,7 +95,7 @@ struct qentry *item; while ((item = dequeue()) != NULL) { - printline(&item->data, '-', NULL, 0); + grep_printline(&item->data, '-'); free(item->data.dat); free(item); } Index: usr.bin/grep/util.c =================================================================== --- usr.bin/grep/util.c +++ usr.bin/grep/util.c @@ -55,10 +55,24 @@ #include "grep.h" static int linesqueued; -static int procline(struct str *l, int); static int lasta; -static bool ctxover; + +/* + * Parsing context; used to hold things like matches made and + * other useful bits + */ +struct parsec { + regmatch_t matches[MAX_LINE_MATCHES]; /* Matches made */ + struct str ln; /* Current line */ + size_t matchidx; /* Latest used match index */ + bool binary; /* Binary file? */ +}; + + +static int procline(struct parsec *pc); +static void printline(struct parsec *pc, int sep); +static void printline_metadata(struct str *line, int sep); bool file_matching(const char *fname) @@ -183,12 +197,19 @@ int procfile(const char *fn) { + struct parsec pc; struct file *f; struct stat sb; - struct str ln; + struct str *ln; mode_t s; - int c, t; - + int c, t, tail; + bool ctxover; + bool doctx; + + doctx = false; + if ((!pc.binary || binbehave != BINFILE_BIN) && !cflag && !qflag && + !lflag && !Lflag && (Aflag != 0 || Bflag != 0)) + doctx = true; mcount = mlimit; if (strcmp(fn, "-") == 0) { @@ -213,57 +234,95 @@ return (0); } - ln.file = grep_malloc(strlen(fn) + 1); - strcpy(ln.file, fn); - ln.line_no = 0; - ln.len = 0; + /* Convenience */ + ln = &pc.ln; + pc.ln.file = grep_malloc(strlen(fn) + 1); + strcpy(pc.ln.file, fn); + pc.ln.line_no = 0; + pc.ln.len = 0; + pc.ln.off = -1; ctxover = false; linesqueued = 0; tail = 0; lasta = 0; - ln.off = -1; + pc.binary = f->binary; for (c = 0; c == 0 || !(lflag || qflag); ) { - ln.off += ln.len + 1; - if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) { - if (ln.line_no == 0 && matchall) - exit(0); + /* Reset match count for every line processed */ + pc.matchidx = 0; + pc.ln.off += pc.ln.len + 1; + if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || pc.ln.len == 0) { + if (pc.ln.line_no == 0 && matchall) + /* An empty file, empty pattern, -w flag does not match */ + exit(matchall && wflag ? 1 : 0); else break; } - if (ln.len > 0 && ln.dat[ln.len - 1] == fileeol) - --ln.len; - ln.line_no++; + + if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) + --pc.ln.len; + pc.ln.line_no++; /* Return if we need to skip a binary file */ - if (f->binary && binbehave == BINFILE_SKIP) { + if (pc.binary && binbehave == BINFILE_SKIP) { grep_close(f); - free(ln.file); + free(pc.ln.file); free(f); return (0); } - /* Process the file line-by-line, enqueue non-matching lines */ - if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { - /* Except don't enqueue lines that appear in -A ctx */ - if (ln.line_no == 0 || lasta != ln.line_no) { - /* queue is maxed to Bflag number of lines */ - enqueue(&ln); + if ((t = procline(&pc)) == 0) + ++c; + + /* Deal with any -B context */ + if (t == 0 && doctx) { + if (!first && !prev && tail == 0 && + (Bflag || Aflag) && !ctxover) + printf("--\n"); + tail = Aflag; + if (Bflag > 0) { + printqueue(); + ctxover = false; + } + linesqueued = 0; + prev = true; + first = false; + } + /* Print the matching line, but only if not quiet/binary */ + if (t == 0 && !qflag && !pc.binary) + printline(&pc, ':'); + /* Deal with any -A context */ + if (t != 0 && doctx) { + if (tail > 0) { + /* Print -A lines following matches */ + lasta = pc.ln.line_no; + printline(&pc, '-'); + tail--; + } + /* Don't enqueue lines that appear in -A ctx */ + if (pc.ln.line_no == 0 || lasta != pc.ln.line_no) { + enqueue(ln); linesqueued++; ctxover = false; } else { /* - * Indicate to procline() that we have ctx - * overlap and make sure queue is empty. + * Dump the queue because we have + * context overlap */ if (!ctxover) clearqueue(); ctxover = true; } + prev = false; } - c += t; - if (mflag && mcount <= 0) - break; + + /* Count the matches if we have a match limit */ + if (t == 0 && mflag) { + --mcount; + if (mflag && mcount <= 0) + break; + } + } if (Bflag > 0) clearqueue(); @@ -271,7 +330,7 @@ if (cflag) { if (!hflag) - printf("%s:", ln.file); + printf("%s:", pc.ln.file); printf("%u\n", c); } if (lflag && !qflag && c != 0) @@ -282,7 +341,7 @@ binbehave == BINFILE_BIN && f->binary && !qflag) printf(getstr(8), fn); - free(ln.file); + free(pc.ln.file); free(f); return (c); } @@ -297,35 +356,50 @@ * appropriate output. */ static int -procline(struct str *l, int nottext) +procline(struct parsec *pc) { - regmatch_t matches[MAX_LINE_MATCHES]; - regmatch_t pmatch, lastmatch; + regmatch_t pmatch, lastmatch, chkmatch; + wchar_t wbegin, wend; size_t st = 0, nst = 0; unsigned int i; - int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags; - int startm = 0; + int c = 0, r = 0, lastmatches = 0, leflags = eflags; + size_t startm = 0, matchidx; + + matchidx = pc->matchidx; + + /* Special case: empty pattern with -w flag, check first character */ + if (matchall && wflag) { + if (pc->ln.len == 0) + return (0); + wend = L' '; + if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) + return (1); + else + return (0); + } else if (matchall) + return (0); /* Initialize to avoid a false positive warning from GCC. */ lastmatch.rm_so = lastmatch.rm_eo = 0; /* Loop to process the whole line */ - while (st <= l->len) { + while (st <= pc->ln.len) { lastmatches = 0; - startm = m; + startm = matchidx; if (st > 0) leflags |= REG_NOTBOL; /* Loop to compare with all the patterns */ for (i = 0; i < patterns; i++) { + pmatch.rm_so = st; - pmatch.rm_eo = l->len; + pmatch.rm_eo = pc->ln.len; #ifndef WITHOUT_FASTMATCH if (fg_pattern[i].pattern) r = fastexec(&fg_pattern[i], - l->dat, 1, &pmatch, leflags); + pc->ln.dat, 1, &pmatch, leflags); else #endif - r = regexec(&r_pattern[i], l->dat, 1, + r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch, leflags); r = (r == 0) ? 0 : REG_NOMATCH; if (r == REG_NOMATCH) @@ -333,7 +407,7 @@ /* Check for full match */ if (r == 0 && xflag) if (pmatch.rm_so != 0 || - (size_t)pmatch.rm_eo != l->len) + (size_t)pmatch.rm_eo != pc->ln.len) r = REG_NOMATCH; /* Check for whole word match */ #ifndef WITHOUT_FASTMATCH @@ -341,16 +415,14 @@ #else if (r == 0 && wflag) { #endif - wchar_t wbegin, wend; - wbegin = wend = L' '; if (pmatch.rm_so != 0 && - sscanf(&l->dat[pmatch.rm_so - 1], + sscanf(&pc->ln.dat[pmatch.rm_so - 1], "%lc", &wbegin) != 1) r = REG_NOMATCH; else if ((size_t)pmatch.rm_eo != - l->len && - sscanf(&l->dat[pmatch.rm_eo], + pc->ln.len && + sscanf(&pc->ln.dat[pmatch.rm_eo], "%lc", &wend) != 1) r = REG_NOMATCH; else if (iswword(wbegin) || @@ -360,20 +432,23 @@ if (r == 0) { lastmatches++; lastmatch = pmatch; - if (m == 0) + + if (matchidx == 0) c++; - if (m < MAX_LINE_MATCHES) { + if (matchidx < MAX_LINE_MATCHES) { /* Replace previous match if the new one is earlier and/or longer */ - if (m > startm) { - if (pmatch.rm_so < matches[m-1].rm_so || - (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) { - matches[m-1] = pmatch; + if (matchidx > startm) { + chkmatch = pc->matches[matchidx - 1]; + if (pmatch.rm_so < chkmatch.rm_so || + (pmatch.rm_so == chkmatch.rm_so && + (pmatch.rm_eo - pmatch.rm_so) > (chkmatch.rm_eo - chkmatch.rm_so))) { + pc->matches[matchidx - 1] = pmatch; nst = pmatch.rm_eo; } } else { /* Advance as normal if not */ - matches[m++] = pmatch; + pc->matches[matchidx++] = pmatch; nst = pmatch.rm_eo; } } @@ -396,7 +471,7 @@ /* If we didn't have any matches or REG_NOSUB set */ if (lastmatches == 0 || (cflags & REG_NOSUB)) - nst = l->len; + nst = pc->ln.len; if (lastmatches == 0) /* No matches */ @@ -409,42 +484,12 @@ st = nst; } + /* Reflect the new matchidx in the context */ + pc->matchidx = matchidx; + if (c && binbehave == BINFILE_BIN && pc->binary) + return (0); /* Binary file */ - /* Count the matches if we have a match limit */ - if (mflag) - mcount -= c; - - if (c && binbehave == BINFILE_BIN && nottext) - return (c); /* Binary file */ - - /* Dealing with the context */ - if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { - if (c) { - if (!first && !prev && !tail && (Bflag || Aflag) && - !ctxover) - printf("--\n"); - tail = Aflag; - if (Bflag > 0) { - printqueue(); - ctxover = false; - } - linesqueued = 0; - printline(l, ':', matches, m); - } else { - /* Print -A lines following matches */ - lasta = l->line_no; - printline(l, '-', matches, m); - tail--; - } - } - - if (c) { - prev = true; - first = false; - } else - prev = false; - - return (c); + return (c ? 0 : 1); } /* @@ -499,69 +544,90 @@ } /* - * Prints a matching line according to the command line options. + * Print an entire line as-is, there are no inline matches to consider. This is + * used for printing context. */ -void -printline(struct str *line, int sep, regmatch_t *matches, int m) -{ - size_t a = 0; - int i, n = 0; +void grep_printline(struct str *line, int sep) { + printline_metadata(line, sep); + fwrite(line->dat, line->len, 1, stdout); + putchar(fileeol); +} - /* If matchall, everything matches but don't actually print for -o */ - if (oflag && matchall) - return; +static void +printline_metadata(struct str *line, int sep) +{ + bool printsep; + printsep = false; if (!hflag) { if (!nullflag) { fputs(line->file, stdout); - ++n; + printsep = true; } else { printf("%s", line->file); putchar(0); } } if (nflag) { - if (n > 0) + if (printsep) putchar(sep); printf("%d", line->line_no); - ++n; + printsep = true; } if (bflag) { - if (n > 0) + if (printsep) putchar(sep); printf("%lld", (long long)line->off); - ++n; + printsep = true; } - if (n) + if (printsep) putchar(sep); +} + +/* + * Prints a matching line according to the command line options. + */ +static void +printline(struct parsec *pc, int sep) +{ + size_t a = 0; + size_t i, matchidx; + regmatch_t match; + + /* If matchall, everything matches but don't actually print for -o */ + if (oflag && matchall) + return; + + matchidx = pc->matchidx; + /* --color and -o */ - if ((oflag || color) && m > 0) { - for (i = 0; i < m; i++) { + if ((oflag || color) && matchidx > 0) { + printline_metadata(&pc->ln, sep); + for (i = 0; i < matchidx; i++) { + match = pc->matches[i]; /* Don't output zero length matches */ - if (matches[i].rm_so == matches[i].rm_eo) + if (match.rm_so == match.rm_eo) continue; if (!oflag) - fwrite(line->dat + a, matches[i].rm_so - a, 1, + fwrite(pc->ln.dat + a, match.rm_so - a, 1, stdout); - if (color) + if (color) fprintf(stdout, "\33[%sm\33[K", color); - fwrite(line->dat + matches[i].rm_so, - matches[i].rm_eo - matches[i].rm_so, 1, - stdout); - if (color) + fwrite(pc->ln.dat + match.rm_so, + match.rm_eo - match.rm_so, 1, stdout); + if (color) fprintf(stdout, "\33[m\33[K"); - a = matches[i].rm_eo; + a = match.rm_eo; if (oflag) putchar('\n'); } if (!oflag) { - if (line->len - a > 0) - fwrite(line->dat + a, line->len - a, 1, stdout); + if (pc->ln.len - a > 0) + fwrite(pc->ln.dat + a, pc->ln.len - a, 1, + stdout); putchar('\n'); } - } else { - fwrite(line->dat, line->len, 1, stdout); - putchar(fileeol); - } + } else + grep_printline(&pc->ln, sep); }