Index: stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh =================================================================== --- stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh (revision 354627) +++ stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh (revision 354628) @@ -1,786 +1,859 @@ # $NetBSD: t_grep.sh,v 1.3 2017/01/14 20:43:52 christos Exp $ # # Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # atf_test_case basic basic_head() { atf_set "descr" "Checks basic functionality" } basic_body() { atf_check -o file:"$(atf_get_srcdir)/d_basic.out" -x \ 'jot 10000 | grep 123' } atf_test_case binary binary_head() { atf_set "descr" "Checks handling of binary files" } binary_body() { dd if=/dev/zero count=1 of=test.file echo -n "foobar" >> test.file atf_check -o file:"$(atf_get_srcdir)/d_binary.out" grep foobar test.file } atf_test_case recurse recurse_head() { atf_set "descr" "Checks recursive searching" } recurse_body() { mkdir -p recurse/a/f recurse/d echo -e "cod\ndover sole\nhaddock\nhalibut\npilchard" > recurse/d/fish echo -e "cod\nhaddock\nplaice" > recurse/a/f/favourite-fish atf_check -o file:"$(atf_get_srcdir)/d_recurse.out" -x "grep -r haddock recurse | sort" } atf_test_case recurse_symlink recurse_symlink_head() { atf_set "descr" "Checks symbolic link recursion" } recurse_symlink_body() { # Begin FreeBSD grep_type if [ $? -eq $GREP_TYPE_GNU ]; then atf_expect_fail "this test doesn't pass with gnu grep from ports" fi # End FreeBSD mkdir -p test/c/d (cd test/c/d && ln -s ../d .) echo "Test string" > test/c/match atf_check -o file:"$(atf_get_srcdir)/d_recurse_symlink.out" \ -e file:"$(atf_get_srcdir)/d_recurse_symlink.err" \ grep -r string test } atf_test_case word_regexps word_regexps_head() { atf_set "descr" "Checks word-regexps" } word_regexps_body() { atf_check -o file:"$(atf_get_srcdir)/d_word_regexps.out" \ grep -w separated $(atf_get_srcdir)/d_input # Begin FreeBSD printf "xmatch pmatch\n" > test1 atf_check -o inline:"pmatch\n" grep -Eow "(match )?pmatch" test1 # End FreeBSD } atf_test_case begin_end begin_end_head() { atf_set "descr" "Checks handling of line beginnings and ends" } begin_end_body() { atf_check -o file:"$(atf_get_srcdir)/d_begin_end_a.out" \ grep ^Front "$(atf_get_srcdir)/d_input" atf_check -o file:"$(atf_get_srcdir)/d_begin_end_b.out" \ grep ending$ "$(atf_get_srcdir)/d_input" } atf_test_case ignore_case ignore_case_head() { atf_set "descr" "Checks ignore-case option" } ignore_case_body() { atf_check -o file:"$(atf_get_srcdir)/d_ignore_case.out" \ grep -i Upper "$(atf_get_srcdir)/d_input" } atf_test_case invert invert_head() { atf_set "descr" "Checks selecting non-matching lines with -v option" } invert_body() { atf_check -o file:"$(atf_get_srcdir)/d_invert.out" \ grep -v fish "$(atf_get_srcdir)/d_invert.in" } atf_test_case whole_line whole_line_head() { atf_set "descr" "Checks whole-line matching with -x flag" } whole_line_body() { atf_check -o file:"$(atf_get_srcdir)/d_whole_line.out" \ grep -x matchme "$(atf_get_srcdir)/d_input" } atf_test_case negative negative_head() { atf_set "descr" "Checks handling of files with no matches" } negative_body() { atf_check -s ne:0 grep "not a hope in hell" "$(atf_get_srcdir)/d_input" } atf_test_case context context_head() { atf_set "descr" "Checks displaying context with -A, -B and -C flags" } context_body() { cp $(atf_get_srcdir)/d_context_*.* . atf_check -o file:d_context_a.out grep -C2 bamboo d_context_a.in atf_check -o file:d_context_b.out grep -A3 tilt d_context_a.in atf_check -o file:d_context_c.out grep -B4 Whig d_context_a.in atf_check -o file:d_context_d.out grep -C1 pig d_context_a.in d_context_b.in atf_check -o file:d_context_e.out \ grep -E -C1 '(banana|monkey)' d_context_e.in atf_check -o file:d_context_f.out \ grep -Ev -B2 '(banana|monkey|fruit)' d_context_e.in atf_check -o file:d_context_g.out \ grep -Ev -A1 '(banana|monkey|fruit)' d_context_e.in } atf_test_case file_exp file_exp_head() { atf_set "descr" "Checks reading expressions from file" } file_exp_body() { atf_check -o file:"$(atf_get_srcdir)/d_file_exp.out" -x \ 'jot 21 -1 1.00 | grep -f '"$(atf_get_srcdir)"'/d_file_exp.in' } atf_test_case egrep egrep_head() { atf_set "descr" "Checks matching special characters with egrep" } egrep_body() { atf_check -o file:"$(atf_get_srcdir)/d_egrep.out" \ egrep '\?|\*$$' "$(atf_get_srcdir)/d_input" } atf_test_case zgrep zgrep_head() { atf_set "descr" "Checks handling of gzipped files with zgrep" } zgrep_body() { cp "$(atf_get_srcdir)/d_input" . gzip d_input || atf_fail "gzip failed" atf_check -o file:"$(atf_get_srcdir)/d_zgrep.out" zgrep -h line d_input.gz } atf_test_case nonexistent nonexistent_head() { atf_set "descr" "Checks that -s flag suppresses error" \ "messages about nonexistent files" } nonexistent_body() { atf_check -s ne:0 grep -s foobar nonexistent } atf_test_case context2 context2_head() { atf_set "descr" "Checks displaying context with -z flag" } context2_body() { printf "haddock\000cod\000plaice\000" > test1 printf "mackeral\000cod\000crab\000" > test2 atf_check -o file:"$(atf_get_srcdir)/d_context2_a.out" \ grep -z -A1 cod test1 test2 atf_check -o file:"$(atf_get_srcdir)/d_context2_b.out" \ grep -z -B1 cod test1 test2 atf_check -o file:"$(atf_get_srcdir)/d_context2_c.out" \ grep -z -C1 cod test1 test2 } # Begin FreeBSD # What grep(1) are we working with? # - 0 : bsdgrep # - 1 : gnu grep 2.51 (base) # - 2 : gnu grep (ports) GREP_TYPE_BSD=0 GREP_TYPE_GNU_FREEBSD=1 GREP_TYPE_GNU=2 GREP_TYPE_UNKNOWN=3 grep_type() { local grep_version=$(grep --version) case "$grep_version" in *"BSD grep"*) return $GREP_TYPE_BSD ;; *"GNU grep"*) case "$grep_version" in *2.5.1-FreeBSD*) return $GREP_TYPE_GNU_FREEBSD ;; *) return $GREP_TYPE_GNU ;; esac ;; esac atf_fail "unknown grep type: $grep_version" } atf_test_case oflag_zerolen oflag_zerolen_head() { atf_set "descr" "Check behavior of zero-length matches with -o flag (PR 195763)" } oflag_zerolen_body() { grep_type if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then atf_expect_fail "this test doesn't pass with gnu grep in base" fi atf_check -o file:"$(atf_get_srcdir)/d_oflag_zerolen_a.out" \ grep -Eo '(^|:)0*' "$(atf_get_srcdir)/d_oflag_zerolen_a.in" atf_check -o file:"$(atf_get_srcdir)/d_oflag_zerolen_b.out" \ grep -Eo '(^|:)0*' "$(atf_get_srcdir)/d_oflag_zerolen_b.in" atf_check -o file:"$(atf_get_srcdir)/d_oflag_zerolen_c.out" \ grep -Eo '[[:alnum:]]*' "$(atf_get_srcdir)/d_oflag_zerolen_c.in" atf_check -o empty grep -Eo '' "$(atf_get_srcdir)/d_oflag_zerolen_d.in" atf_check -o file:"$(atf_get_srcdir)/d_oflag_zerolen_e.out" \ grep -o -e 'ab' -e 'bc' "$(atf_get_srcdir)/d_oflag_zerolen_e.in" atf_check -o file:"$(atf_get_srcdir)/d_oflag_zerolen_e.out" \ grep -o -e 'bc' -e 'ab' "$(atf_get_srcdir)/d_oflag_zerolen_e.in" } atf_test_case xflag xflag_head() { atf_set "descr" "Check that we actually get a match with -x flag (PR 180990)" } xflag_body() { echo 128 > match_file seq 1 128 > pattern_file grep -xf pattern_file match_file } atf_test_case color color_head() { atf_set "descr" "Check --color support" } color_body() { grep_type if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then atf_expect_fail "this test doesn't pass with gnu grep in base" fi echo 'abcd*' > grepfile echo 'abc$' >> grepfile echo '^abc' >> grepfile atf_check -o file:"$(atf_get_srcdir)/d_color_a.out" \ grep --color=auto -e '.*' -e 'a' "$(atf_get_srcdir)/d_color_a.in" atf_check -o file:"$(atf_get_srcdir)/d_color_b.out" \ grep --color=auto -f grepfile "$(atf_get_srcdir)/d_color_b.in" atf_check -o file:"$(atf_get_srcdir)/d_color_c.out" \ grep --color=always -f grepfile "$(atf_get_srcdir)/d_color_b.in" } atf_test_case f_file_empty f_file_empty_head() { atf_set "descr" "Check for handling of a null byte in empty file, specified by -f (PR 202022)" } f_file_empty_body() { printf "\0\n" > nulpat atf_check -s exit:1 grep -f nulpat "$(atf_get_srcdir)/d_f_file_empty.in" } atf_test_case escmap escmap_head() { atf_set "descr" "Check proper handling of escaped vs. unescaped dot expressions (PR 175314)" } escmap_body() { atf_check -s exit:1 grep -o 'f.o\.' "$(atf_get_srcdir)/d_escmap.in" atf_check -o not-empty grep -o 'f.o.' "$(atf_get_srcdir)/d_escmap.in" } atf_test_case egrep_empty_invalid egrep_empty_invalid_head() { atf_set "descr" "Check for handling of an invalid empty pattern (PR 194823)" } egrep_empty_invalid_body() { atf_check -e ignore -s not-exit:0 egrep '{' /dev/null } atf_test_case zerolen zerolen_head() { atf_set "descr" "Check for successful zero-length matches with ^$" } zerolen_body() { printf "Eggs\n\nCheese" > test1 atf_check -o inline:"\n" grep -e "^$" test1 atf_check -o inline:"Eggs\nCheese\n" grep -v -e "^$" test1 } atf_test_case wflag_emptypat wflag_emptypat_head() { atf_set "descr" "Check for proper handling of -w with an empty pattern (PR 105221)" } wflag_emptypat_body() { printf "" > test1 printf "\n" > test2 printf "qaz" > test3 printf " qaz\n" > test4 atf_check -s exit:1 -o empty grep -w -e "" test1 atf_check -o file:test2 grep -w -e "" test2 atf_check -s exit:1 -o empty grep -w -e "" test3 atf_check -o file:test4 grep -w -e "" test4 } +atf_test_case xflag_emptypat +xflag_emptypat_body() +{ + printf "" > test1 + printf "\n" > test2 + printf "qaz" > test3 + printf " qaz\n" > test4 + + # -x is whole-line, more strict than -w. + atf_check -s exit:1 -o empty grep -x -e "" test1 + + atf_check -o file:test2 grep -x -e "" test2 + + atf_check -s exit:1 -o empty grep -x -e "" test3 + + atf_check -s exit:1 -o empty grep -x -e "" test4 + + total=$(wc -l /COPYRIGHT | sed 's/[^0-9]//g') + + # Simple checks that grep -x with an empty pattern isn't matching every + # line. The exact counts aren't important, as long as they don't + # match the total line count and as long as they don't match each other. + atf_check -o save:xpositive.count grep -Fxc '' /COPYRIGHT + atf_check -o save:xnegative.count grep -Fvxc '' /COPYRIGHT + + atf_check -o not-inline:"${total}" cat xpositive.count + atf_check -o not-inline:"${total}" cat xnegative.count + + atf_check -o not-file:xnegative.count cat xpositive.count +} + +atf_test_case xflag_emptypat_plus +xflag_emptypat_plus_body() +{ + printf "foo\n\nbar\n\nbaz\n" > target + printf "foo\n \nbar\n \nbaz\n" > target_spacelines + printf "foo\nbar\nbaz\n" > matches + printf " \n \n" > spacelines + + printf "foo\n\nbar\n\nbaz\n" > patlist1 + printf "foo\n\nba\n\nbaz\n" > patlist2 + + sed -e '/bar/d' target > matches_not2 + + # Normal handling first + atf_check -o file:target grep -Fxf patlist1 target + atf_check -o file:matches grep -Fxf patlist1 target_spacelines + atf_check -o file:matches_not2 grep -Fxf patlist2 target + + # -v handling + atf_check -s exit:1 -o empty grep -Fvxf patlist1 target + atf_check -o file:spacelines grep -Fxvf patlist1 target_spacelines +} + atf_test_case excessive_matches excessive_matches_head() { atf_set "descr" "Check for proper handling of lines with excessive matches (PR 218811)" } excessive_matches_body() { grep_type if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then atf_expect_fail "this test does not pass with GNU grep in base" fi for i in $(jot 4096); do printf "x" >> test.in done atf_check -s exit:0 -x '[ $(grep -o x test.in | wc -l) -eq 4096 ]' atf_check -s exit:1 -x 'grep -on x test.in | grep -v "1:x"' } atf_test_case fgrep_sanity fgrep_sanity_head() { atf_set "descr" "Check for fgrep sanity, literal expressions only" } fgrep_sanity_body() { printf "Foo" > test1 atf_check -o inline:"Foo\n" fgrep -e "Foo" test1 atf_check -s exit:1 -o empty fgrep -e "Fo." test1 } atf_test_case egrep_sanity egrep_sanity_head() { atf_set "descr" "Check for egrep sanity, EREs only" } egrep_sanity_body() { printf "Foobar(ed)" > test1 printf "M{1}" > test2 atf_check -o inline:"Foo\n" egrep -o -e "F.." test1 atf_check -o inline:"Foobar\n" egrep -o -e "F[a-z]*" test1 atf_check -o inline:"Fo\n" egrep -o -e "F(o|p)" test1 atf_check -o inline:"(ed)\n" egrep -o -e "\(ed\)" test1 atf_check -o inline:"M\n" egrep -o -e "M{1}" test2 atf_check -o inline:"M{1}\n" egrep -o -e "M\{1\}" test2 } atf_test_case grep_sanity grep_sanity_head() { atf_set "descr" "Check for basic grep sanity, BREs only" } grep_sanity_body() { printf "Foobar(ed)" > test1 printf "M{1}" > test2 atf_check -o inline:"Foo\n" grep -o -e "F.." test1 atf_check -o inline:"Foobar\n" grep -o -e "F[a-z]*" test1 atf_check -o inline:"Fo\n" grep -o -e "F\(o\)" test1 atf_check -o inline:"(ed)\n" grep -o -e "(ed)" test1 atf_check -o inline:"M{1}\n" grep -o -e "M{1}" test2 atf_check -o inline:"M\n" grep -o -e "M\{1\}" test2 } atf_test_case wv_combo_break wv_combo_break_head() { atf_set "descr" "Check for incorrectly matching lines with both -w and -v flags (PR 218467)" } wv_combo_break_body() { printf "x xx\n" > test1 printf "xx x\n" > test2 atf_check -o file:test1 grep -w "x" test1 atf_check -o file:test2 grep -w "x" test2 atf_check -s exit:1 grep -v -w "x" test1 atf_check -s exit:1 grep -v -w "x" test2 } atf_test_case ocolor_metadata ocolor_metadata_head() { atf_set "descr" "Check for -n/-b producing per-line metadata output" } ocolor_metadata_body() { grep_type if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then atf_expect_fail "this test does not pass with GNU grep in base" fi printf "xxx\nyyyy\nzzz\nfoobarbaz\n" > test1 check_expr="^[^:]*[0-9][^:]*:[^:]+$" atf_check -o inline:"1:1:xx\n" grep -bon "xx$" test1 atf_check -o inline:"2:4:yyyy\n" grep -bn "yy" test1 atf_check -o inline:"2:6:yy\n" grep -bon "yy$" test1 # These checks ensure that grep isn't producing bogus line numbering # in the middle of a line. atf_check -s exit:1 -x \ "grep -Eon 'x|y|z|f' test1 | grep -Ev '${check_expr}'" atf_check -s exit:1 -x \ "grep -En 'x|y|z|f' --color=always test1 | grep -Ev '${check_expr}'" atf_check -s exit:1 -x \ "grep -Eon 'x|y|z|f' --color=always test1 | grep -Ev '${check_expr}'" } atf_test_case grep_nomatch_flags grep_nomatch_flags_head() { atf_set "descr" "Check for no match (-c, -l, -L, -q) flags not producing line matches or context (PR 219077)" } grep_nomatch_flags_body() { + grep_type + + if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then + atf_expect_fail "this test does not pass with GNU grep in base" + fi + printf "A\nB\nC\n" > test1 atf_check -o inline:"1\n" grep -c -C 1 -e "B" test1 atf_check -o inline:"1\n" grep -c -B 1 -e "B" test1 atf_check -o inline:"1\n" grep -c -A 1 -e "B" test1 atf_check -o inline:"1\n" grep -c -C 1 -e "B" test1 atf_check -o inline:"test1\n" grep -l -e "B" test1 atf_check -o inline:"test1\n" grep -l -B 1 -e "B" test1 atf_check -o inline:"test1\n" grep -l -A 1 -e "B" test1 atf_check -o inline:"test1\n" grep -l -C 1 -e "B" test1 - atf_check -s exit:1 -o inline:"test1\n" grep -L -e "D" test1 + atf_check -o inline:"test1\n" grep -L -e "D" test1 atf_check -o empty grep -q -e "B" test1 atf_check -o empty grep -q -B 1 -e "B" test1 atf_check -o empty grep -q -A 1 -e "B" test1 atf_check -o empty grep -q -C 1 -e "B" test1 } atf_test_case badcontext badcontext_head() { atf_set "descr" "Check for handling of invalid context arguments" } badcontext_body() { printf "A\nB\nC\n" > test1 atf_check -s not-exit:0 -e ignore grep -A "-1" "B" test1 atf_check -s not-exit:0 -e ignore grep -B "-1" "B" test1 atf_check -s not-exit:0 -e ignore grep -C "-1" "B" test1 atf_check -s not-exit:0 -e ignore grep -A "B" "B" test1 atf_check -s not-exit:0 -e ignore grep -B "B" "B" test1 atf_check -s not-exit:0 -e ignore grep -C "B" "B" test1 } atf_test_case binary_flags binary_flags_head() { atf_set "descr" "Check output for binary flags (-a, -I, -U, --binary-files)" } binary_flags_body() { printf "A\000B\000C" > test1 printf "A\n\000B\n\000C" > test2 binmatchtext="Binary file test1 matches\n" # Binaries not treated as text (default, -U) atf_check -o inline:"${binmatchtext}" grep 'B' test1 atf_check -o inline:"${binmatchtext}" grep 'B' -C 1 test1 atf_check -o inline:"${binmatchtext}" grep -U 'B' test1 atf_check -o inline:"${binmatchtext}" grep -U 'B' -C 1 test1 # Binary, -a, no newlines atf_check -o inline:"A\000B\000C\n" grep -a 'B' test1 atf_check -o inline:"A\000B\000C\n" grep -a 'B' -C 1 test1 # Binary, -a, newlines atf_check -o inline:"\000B\n" grep -a 'B' test2 atf_check -o inline:"A\n\000B\n\000C\n" grep -a 'B' -C 1 test2 # Binary files ignored atf_check -s exit:1 grep -I 'B' test2 # --binary-files equivalence atf_check -o inline:"${binmatchtext}" grep --binary-files=binary 'B' test1 atf_check -o inline:"A\000B\000C\n" grep --binary-files=text 'B' test1 atf_check -s exit:1 grep --binary-files=without-match 'B' test2 } atf_test_case mmap mmap_head() { atf_set "descr" "Check basic matching with --mmap flag" } mmap_body() { grep_type if [ $? -eq $GREP_TYPE_GNU ]; then atf_expect_fail "gnu grep from ports has no --mmap option" fi printf "A\nB\nC\n" > test1 atf_check -s exit:0 -o inline:"B\n" grep --mmap -oe "B" test1 atf_check -s exit:1 grep --mmap -e "Z" test1 } -atf_test_case mmap_eof_not_eol -mmap_eof_not_eol_head() -{ - atf_set "descr" "Check --mmap flag handling of encountering EOF without EOL (PR 165471, 219402)" -} -mmap_eof_not_eol_body() -{ - grep_type - if [ $? -eq $GREP_TYPE_GNU ]; then - atf_expect_fail "gnu grep from ports has no --mmap option" - fi - - printf "ABC" > test1 - jot -b " " -s "" 4096 >> test2 - - atf_check -s exit:0 -o inline:"B\n" grep --mmap -oe "B" test1 - # Dependency on jemalloc(3) to detect buffer overflow, otherwise this - # unreliably produces a SIGSEGV or SIGBUS - atf_check -s exit:0 -o not-empty \ - env MALLOC_CONF="redzone:true" grep --mmap -e " " test2 -} - atf_test_case matchall matchall_head() { atf_set "descr" "Check proper behavior of matching all with an empty string" } matchall_body() { printf "" > test1 printf "A" > test2 printf "A\nB" > test3 atf_check -o inline:"test2:A\ntest3:A\ntest3:B\n" grep "" test1 test2 test3 atf_check -o inline:"test3:A\ntest3:B\ntest2:A\n" grep "" test3 test1 test2 atf_check -o inline:"test2:A\ntest3:A\ntest3:B\n" grep "" test2 test3 test1 atf_check -s exit:1 grep "" test1 } atf_test_case fgrep_multipattern fgrep_multipattern_head() { atf_set "descr" "Check proper behavior with multiple patterns supplied to fgrep" } fgrep_multipattern_body() { printf "Foo\nBar\nBaz" > test1 atf_check -o inline:"Foo\nBaz\n" grep -F -e "Foo" -e "Baz" test1 atf_check -o inline:"Foo\nBaz\n" grep -F -e "Baz" -e "Foo" test1 atf_check -o inline:"Bar\nBaz\n" grep -F -e "Bar" -e "Baz" test1 } atf_test_case fgrep_icase fgrep_icase_head() { atf_set "descr" "Check proper handling of -i supplied to fgrep" } fgrep_icase_body() { printf "Foo\nBar\nBaz" > test1 atf_check -o inline:"Foo\nBaz\n" grep -Fi -e "foo" -e "baz" test1 atf_check -o inline:"Foo\nBaz\n" grep -Fi -e "baz" -e "foo" test1 atf_check -o inline:"Bar\nBaz\n" grep -Fi -e "bar" -e "baz" test1 atf_check -o inline:"Bar\nBaz\n" grep -Fi -e "BAR" -e "bAz" test1 } atf_test_case fgrep_oflag fgrep_oflag_head() { atf_set "descr" "Check proper handling of -o supplied to fgrep" } fgrep_oflag_body() { printf "abcdefghi\n" > test1 atf_check -o inline:"a\n" grep -Fo "a" test1 atf_check -o inline:"i\n" grep -Fo "i" test1 atf_check -o inline:"abc\n" grep -Fo "abc" test1 atf_check -o inline:"fgh\n" grep -Fo "fgh" test1 atf_check -o inline:"cde\n" grep -Fo "cde" test1 atf_check -o inline:"bcd\n" grep -Fo -e "bcd" -e "cde" test1 atf_check -o inline:"bcd\nefg\n" grep -Fo -e "bcd" -e "efg" test1 atf_check -s exit:1 grep -Fo "xabc" test1 atf_check -s exit:1 grep -Fo "abcx" test1 atf_check -s exit:1 grep -Fo "xghi" test1 atf_check -s exit:1 grep -Fo "ghix" test1 atf_check -s exit:1 grep -Fo "abcdefghiklmnopqrstuvwxyz" test1 } + +atf_test_case cflag +cflag_head() +{ + atf_set "descr" "Check proper handling of -c" +} +cflag_body() +{ + printf "a\nb\nc\n" > test1 + + atf_check -o inline:"1\n" grep -Ec "a" test1 + atf_check -o inline:"2\n" grep -Ec "a|b" test1 + atf_check -o inline:"3\n" grep -Ec "a|b|c" test1 + + atf_check -o inline:"test1:2\n" grep -EHc "a|b" test1 +} + +atf_test_case mflag +mflag_head() +{ + atf_set "descr" "Check proper handling of -m" +} +mflag_body() +{ + printf "a\nb\nc\nd\ne\nf\n" > test1 + + atf_check -o inline:"1\n" grep -m 1 -Ec "a" test1 + atf_check -o inline:"2\n" grep -m 2 -Ec "a|b" test1 + atf_check -o inline:"3\n" grep -m 3 -Ec "a|b|c|f" test1 + + atf_check -o inline:"test1:2\n" grep -m 2 -EHc "a|b|e|f" test1 +} # End FreeBSD atf_init_test_cases() { atf_add_test_case basic atf_add_test_case binary atf_add_test_case recurse atf_add_test_case recurse_symlink atf_add_test_case word_regexps atf_add_test_case begin_end atf_add_test_case ignore_case atf_add_test_case invert atf_add_test_case whole_line atf_add_test_case negative atf_add_test_case context atf_add_test_case file_exp atf_add_test_case egrep atf_add_test_case zgrep atf_add_test_case nonexistent atf_add_test_case context2 # Begin FreeBSD atf_add_test_case oflag_zerolen atf_add_test_case xflag atf_add_test_case color atf_add_test_case f_file_empty atf_add_test_case escmap atf_add_test_case egrep_empty_invalid atf_add_test_case zerolen atf_add_test_case wflag_emptypat + atf_add_test_case xflag_emptypat + atf_add_test_case xflag_emptypat_plus atf_add_test_case excessive_matches atf_add_test_case wv_combo_break atf_add_test_case fgrep_sanity atf_add_test_case egrep_sanity atf_add_test_case grep_sanity atf_add_test_case ocolor_metadata atf_add_test_case grep_nomatch_flags atf_add_test_case binary_flags atf_add_test_case badcontext atf_add_test_case mmap - atf_add_test_case mmap_eof_not_eol atf_add_test_case matchall atf_add_test_case fgrep_multipattern atf_add_test_case fgrep_icase atf_add_test_case fgrep_oflag + atf_add_test_case cflag + atf_add_test_case mflag # End FreeBSD } Index: stable/11/usr.bin/grep/file.c =================================================================== --- stable/11/usr.bin/grep/file.c (revision 354627) +++ stable/11/usr.bin/grep/file.c (revision 354628) @@ -1,360 +1,381 @@ /* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */ /* $FreeBSD$ */ /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav * Copyright (C) 2008-2010 Gabor Kovesdan * Copyright (C) 2010 Dimitry Andric * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_LZMA #include #endif #ifndef WITHOUT_BZIP2 #include #endif #include "grep.h" #define MAXBUFSIZ (32 * 1024) #define LNBUFBUMP 80 static gzFile gzbufdesc; #ifndef WITHOUT_LZMA static lzma_stream lstrm = LZMA_STREAM_INIT; static lzma_action laction; static uint8_t lin_buf[MAXBUFSIZ]; #endif #ifndef WITHOUT_BZIP2 static BZFILE* bzbufdesc; #endif static unsigned char *buffer; static unsigned char *bufpos; static size_t bufrem; static size_t fsiz; static unsigned char *lnbuf; static size_t lnbuflen; static inline int grep_refill(struct file *f) { ssize_t nr; +#ifndef WITHOUT_LZMA + lzma_ret lzmaret; +#endif if (filebehave == FILE_MMAP) return (0); bufpos = buffer; bufrem = 0; - if (filebehave == FILE_GZIP) { + switch (filebehave) { + case FILE_GZIP: nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); + break; #ifndef WITHOUT_BZIP2 - } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { - int bzerr; + case FILE_BZIP: + if (bzbufdesc != NULL) { + int bzerr; - nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); - switch (bzerr) { - case BZ_OK: - case BZ_STREAM_END: - /* No problem, nr will be okay */ - break; - case BZ_DATA_ERROR_MAGIC: + nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); + switch (bzerr) { + case BZ_OK: + case BZ_STREAM_END: + /* No problem, nr will be okay */ + break; + case BZ_DATA_ERROR_MAGIC: + /* + * As opposed to gzread(), which simply returns the + * plain file data, if it is not in the correct + * compressed format, BZ2_bzRead() instead aborts. + * + * So, just restart at the beginning of the file again, + * and use plain reads from now on. + */ + BZ2_bzReadClose(&bzerr, bzbufdesc); + bzbufdesc = NULL; + if (lseek(f->fd, 0, SEEK_SET) == -1) + return (-1); + nr = read(f->fd, buffer, MAXBUFSIZ); + break; + default: + /* Make sure we exit with an error */ + nr = -1; + } + } else /* - * As opposed to gzread(), which simply returns the - * plain file data, if it is not in the correct - * compressed format, BZ2_bzRead() instead aborts. - * - * So, just restart at the beginning of the file again, - * and use plain reads from now on. + * Also an error case; we should never have a scenario + * where we have an open file but no bzip descriptor + * at this point. See: grep_open */ - BZ2_bzReadClose(&bzerr, bzbufdesc); - bzbufdesc = NULL; - if (lseek(f->fd, 0, SEEK_SET) == -1) - return (-1); - nr = read(f->fd, buffer, MAXBUFSIZ); - break; - default: - /* Make sure we exit with an error */ nr = -1; - } + break; #endif #ifndef WITHOUT_LZMA - } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) { - lzma_ret ret; + case FILE_XZ: + case FILE_LZMA: lstrm.next_out = buffer; do { if (lstrm.avail_in == 0) { lstrm.next_in = lin_buf; nr = read(f->fd, lin_buf, MAXBUFSIZ); if (nr < 0) return (-1); else if (nr == 0) laction = LZMA_FINISH; lstrm.avail_in = nr; } - ret = lzma_code(&lstrm, laction); + lzmaret = lzma_code(&lstrm, laction); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) + if (lzmaret != LZMA_OK && lzmaret != LZMA_STREAM_END) return (-1); - if (lstrm.avail_out == 0 || ret == LZMA_STREAM_END) { + if (lstrm.avail_out == 0 || lzmaret == LZMA_STREAM_END) { bufrem = MAXBUFSIZ - lstrm.avail_out; lstrm.next_out = buffer; lstrm.avail_out = MAXBUFSIZ; } - } while (bufrem == 0 && ret != LZMA_STREAM_END); + } while (bufrem == 0 && lzmaret != LZMA_STREAM_END); return (0); -#endif /* WIHTOUT_LZMA */ - } else +#endif /* WITHOUT_LZMA */ + default: nr = read(f->fd, buffer, MAXBUFSIZ); - + } if (nr < 0) return (-1); bufrem = nr; return (0); } static inline int grep_lnbufgrow(size_t newlen) { if (lnbuflen < newlen) { lnbuf = grep_realloc(lnbuf, newlen); lnbuflen = newlen; } return (0); } char * -grep_fgetln(struct file *f, size_t *lenp) +grep_fgetln(struct file *f, struct parsec *pc) { unsigned char *p; char *ret; size_t len; size_t off; ptrdiff_t diff; /* Fill the buffer, if necessary */ if (bufrem == 0 && grep_refill(f) != 0) goto error; if (bufrem == 0) { /* Return zero length to indicate EOF */ - *lenp = 0; + pc->ln.len= 0; return (bufpos); } /* Look for a newline in the remaining part of the buffer */ if ((p = memchr(bufpos, fileeol, bufrem)) != NULL) { ++p; /* advance over newline */ ret = bufpos; len = p - bufpos; bufrem -= len; bufpos = p; - *lenp = len; + pc->ln.len = len; return (ret); } /* We have to copy the current buffered data to the line buffer */ for (len = bufrem, off = 0; ; len += bufrem) { /* Make sure there is room for more data */ if (grep_lnbufgrow(len + LNBUFBUMP)) goto error; memcpy(lnbuf + off, bufpos, len - off); /* With FILE_MMAP, this is EOF; there's no more to refill */ if (filebehave == FILE_MMAP) { bufrem -= len; break; } off = len; /* Fetch more to try and find EOL/EOF */ if (grep_refill(f) != 0) goto error; if (bufrem == 0) /* EOF: return partial line */ break; if ((p = memchr(bufpos, fileeol, bufrem)) == NULL) continue; /* got it: finish up the line (like code above) */ ++p; diff = p - bufpos; len += diff; if (grep_lnbufgrow(len)) goto error; memcpy(lnbuf + off, bufpos, diff); bufrem -= diff; bufpos = p; break; } - *lenp = len; + pc->ln.len = len; return (lnbuf); error: - *lenp = 0; + pc->ln.len = 0; return (NULL); } /* * Opens a file for processing. */ struct file * grep_open(const char *path) { struct file *f; +#ifndef WITHOUT_LZMA + lzma_ret lzmaret; +#endif f = grep_malloc(sizeof *f); memset(f, 0, sizeof *f); if (path == NULL) { /* Processing stdin implies --line-buffered. */ lbflag = true; f->fd = STDIN_FILENO; } else if ((f->fd = open(path, O_RDONLY)) == -1) goto error1; if (filebehave == FILE_MMAP) { struct stat st; if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) || (!S_ISREG(st.st_mode))) filebehave = FILE_STDIO; else { int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC; #ifdef MAP_PREFAULT_READ flags |= MAP_PREFAULT_READ; #endif fsiz = st.st_size; buffer = mmap(NULL, fsiz, PROT_READ, flags, f->fd, (off_t)0); if (buffer == MAP_FAILED) filebehave = FILE_STDIO; else { bufrem = st.st_size; bufpos = buffer; madvise(buffer, st.st_size, MADV_SEQUENTIAL); } } } if ((buffer == NULL) || (buffer == MAP_FAILED)) buffer = grep_malloc(MAXBUFSIZ); - if (filebehave == FILE_GZIP && - (gzbufdesc = gzdopen(f->fd, "r")) == NULL) - goto error2; - + switch (filebehave) { + case FILE_GZIP: + if ((gzbufdesc = gzdopen(f->fd, "r")) == NULL) + goto error2; + break; #ifndef WITHOUT_BZIP2 - if (filebehave == FILE_BZIP && - (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) - goto error2; + case FILE_BZIP: + if ((bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) + goto error2; + break; #endif #ifndef WITHOUT_LZMA - else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) { - lzma_ret ret; + case FILE_XZ: + case FILE_LZMA: - ret = (filebehave == FILE_XZ) ? - lzma_stream_decoder(&lstrm, UINT64_MAX, - LZMA_CONCATENATED) : - lzma_alone_decoder(&lstrm, UINT64_MAX); + if (filebehave == FILE_XZ) + lzmaret = lzma_stream_decoder(&lstrm, UINT64_MAX, + LZMA_CONCATENATED); + else + lzmaret = lzma_alone_decoder(&lstrm, UINT64_MAX); - if (ret != LZMA_OK) + if (lzmaret != LZMA_OK) goto error2; lstrm.avail_in = 0; lstrm.avail_out = MAXBUFSIZ; laction = LZMA_RUN; - } + break; #endif + } /* Fill read buffer, also catches errors early */ if (bufrem == 0 && grep_refill(f) != 0) goto error2; /* Check for binary stuff, if necessary */ if (binbehave != BINFILE_TEXT && fileeol != '\0' && memchr(bufpos, '\0', bufrem) != NULL) - f->binary = true; + f->binary = true; return (f); error2: close(f->fd); error1: free(f); return (NULL); } /* * Closes a file. */ void grep_close(struct file *f) { close(f->fd); /* Reset read buffer and line buffer */ if (filebehave == FILE_MMAP) { munmap(buffer, fsiz); buffer = NULL; } bufpos = buffer; bufrem = 0; free(lnbuf); lnbuf = NULL; lnbuflen = 0; } Index: stable/11/usr.bin/grep/grep.1 =================================================================== --- stable/11/usr.bin/grep/grep.1 (revision 354627) +++ stable/11/usr.bin/grep/grep.1 (revision 354628) @@ -1,497 +1,504 @@ .\" $NetBSD: grep.1,v 1.2 2011/02/16 01:31:33 joerg Exp $ .\" $FreeBSD$ .\" $OpenBSD: grep.1,v 1.38 2010/04/05 06:30:59 jmc Exp $ .\" Copyright (c) 1980, 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)grep.1 8.3 (Berkeley) 4/18/94 .\" -.Dd April 17, 2017 +.Dd May 7, 2018 .Dt GREP 1 .Os .Sh NAME .Nm grep , egrep , fgrep , rgrep , .Nm zgrep , zegrep , zfgrep .Nd file pattern searcher .Sh SYNOPSIS .Nm grep .Bk -words .Op Fl abcdDEFGHhIiJLlmnOopqRSsUVvwxZz .Op Fl A Ar num .Op Fl B Ar num .Op Fl C Ns Op Ar num .Op Fl e Ar pattern .Op Fl f Ar file .Op Fl Fl binary-files Ns = Ns Ar value .Op Fl Fl color Ns Op = Ns Ar when .Op Fl Fl colour Ns Op = Ns Ar when .Op Fl Fl context Ns Op = Ns Ar num .Op Fl Fl label .Op Fl Fl line-buffered .Op Fl Fl null .Op Ar pattern .Op Ar .Ek .Sh DESCRIPTION The .Nm grep utility searches any given input files, selecting lines that match one or more patterns. By default, a pattern matches an input line if the regular expression (RE) in the pattern matches the input line without its trailing newline. An empty expression matches every line. Each input line that matches at least one of the patterns is written to the standard output. .Pp .Nm grep is used for simple patterns and basic regular expressions .Pq BREs ; .Nm egrep can handle extended regular expressions .Pq EREs . See .Xr re_format 7 for more information on regular expressions. .Nm fgrep is quicker than both .Nm grep and .Nm egrep , but can only handle fixed patterns (i.e. it does not interpret regular expressions). Patterns may consist of one or more lines, allowing any of the pattern lines to match a portion of the input. .Pp .Nm zgrep , .Nm zegrep , and .Nm zfgrep act like .Nm grep , .Nm egrep , and .Nm fgrep , respectively, but accept input files compressed with the .Xr compress 1 or .Xr gzip 1 compression utilities. .Pp The following options are available: .Bl -tag -width indent .It Fl A Ar num , Fl Fl after-context Ns = Ns Ar num Print .Ar num lines of trailing context after each match. See also the .Fl B and .Fl C options. .It Fl a , Fl Fl text Treat all files as ASCII text. Normally .Nm will simply print .Dq Binary file ... matches if files contain binary characters. Use of this option forces .Nm to output lines matching the specified pattern. .It Fl B Ar num , Fl Fl before-context Ns = Ns Ar num Print .Ar num lines of leading context before each match. See also the .Fl A and .Fl C options. .It Fl b , Fl Fl byte-offset The offset in bytes of a matched pattern is displayed in front of the respective matched line. .It Fl C Ns Op Ar num , Fl Fl context Ns = Ns Ar num Print .Ar num lines of leading and trailing context surrounding each match. The default is 2 and is equivalent to .Fl A .Ar 2 .Fl B .Ar 2 . Note: no whitespace may be given between the option and its argument. .It Fl c , Fl Fl count Only a count of selected lines is written to standard output. .It Fl Fl colour Ns = Ns Op Ar when , Fl Fl color Ns = Ns Op Ar when Mark up the matching text with the expression stored in .Ev GREP_COLOR environment variable. The possible values of when can be `never', `always' or `auto'. .It Fl D Ar action , Fl Fl devices Ns = Ns Ar action Specify the demanded action for devices, FIFOs and sockets. The default action is `read', which means, that they are read as if they were normal files. If the action is set to `skip', devices will be silently skipped. .It Fl d Ar action , Fl Fl directories Ns = Ns Ar action Specify the demanded action for directories. It is `read' by default, which means that the directories are read in the same manner as normal files. Other possible values are `skip' to silently ignore the directories, and `recurse' to read them recursively, which has the same effect as the .Fl R and .Fl r option. .It Fl E , Fl Fl extended-regexp Interpret .Ar pattern as an extended regular expression (i.e. force .Nm grep to behave as .Nm egrep ) . .It Fl e Ar pattern , Fl Fl regexp Ns = Ns Ar pattern Specify a pattern used during the search of the input: an input line is selected if it matches any of the specified patterns. This option is most useful when multiple .Fl e options are used to specify multiple patterns, or when a pattern begins with a dash .Pq Sq - . .It Fl Fl exclude If specified, it excludes files matching the given filename pattern from the search. Note that .Fl Fl exclude patterns take priority over .Fl Fl include patterns, and if no .Fl Fl include pattern is specified, all files are searched that are not excluded. Patterns are matched to the full path specified, not only to the filename component. .It Fl Fl exclude-dir If .Fl R is specified, it excludes directories matching the given filename pattern from the search. Note that .Fl Fl exclude-dir patterns take priority over .Fl Fl include-dir patterns, and if no .Fl Fl include-dir pattern is specified, all directories are searched that are not excluded. .It Fl F , Fl Fl fixed-strings Interpret .Ar pattern as a set of fixed strings (i.e. force .Nm grep to behave as .Nm fgrep ) . .It Fl f Ar file , Fl Fl file Ns = Ns Ar file Read one or more newline separated patterns from .Ar file . Empty pattern lines match every input line. Newlines are not considered part of a pattern. If .Ar file is empty, nothing is matched. .It Fl G , Fl Fl basic-regexp Interpret .Ar pattern as a basic regular expression (i.e. force .Nm grep to behave as traditional .Nm grep ) . .It Fl H Always print filename headers with output lines. .It Fl h , Fl Fl no-filename Never print filename headers .Pq i.e. filenames with output lines. .It Fl Fl help Print a brief help message. .It Fl I Ignore binary files. This option is equivalent to .Fl Fl binary-file Ns = Ns Ar without-match option. .It Fl i , Fl Fl ignore-case Perform case insensitive matching. By default, .Nm grep is case sensitive. .It Fl Fl include If specified, only files matching the given filename pattern are searched. Note that .Fl Fl exclude patterns take priority over .Fl Fl include patterns. Patterns are matched to the full path specified, not only to the filename component. .It Fl Fl include-dir If .Fl R is specified, only directories matching the given filename pattern are searched. Note that .Fl Fl exclude-dir patterns take priority over .Fl Fl include-dir patterns. .It Fl J, Fl Fl bz2decompress Decompress the .Xr bzip2 1 compressed file before looking for the text. .It Fl L , Fl Fl files-without-match Only the names of files not containing selected lines are written to standard output. Pathnames are listed once per file searched. If the standard input is searched, the string .Dq (standard input) is written. .It Fl l , Fl Fl files-with-matches Only the names of files containing selected lines are written to standard output. .Nm grep will only search a file until a match has been found, making searches potentially less expensive. Pathnames are listed once per file searched. If the standard input is searched, the string .Dq (standard input) is written. .It Fl Fl mmap Use .Xr mmap 2 instead of .Xr read 2 to read input, which can result in better performance under some circumstances but can cause undefined behaviour. .It Fl m Ar num, Fl Fl max-count Ns = Ns Ar num Stop reading the file after .Ar num matches. .It Fl n , Fl Fl line-number Each output line is preceded by its relative line number in the file, starting at line 1. The line number counter is reset for each file processed. This option is ignored if .Fl c , .Fl L , .Fl l , or .Fl q is specified. .It Fl Fl null Prints a zero-byte after the file name. .It Fl O If .Fl R is specified, follow symbolic links only if they were explicitly listed on the command line. The default is not to follow symbolic links. .It Fl o, Fl Fl only-matching Prints only the matching part of the lines. .It Fl p If .Fl R is specified, no symbolic links are followed. This is the default. .It Fl q , Fl Fl quiet , Fl Fl silent Quiet mode: suppress normal output. .Nm grep will only search a file until a match has been found, making searches potentially less expensive. .It Fl R , Fl r , Fl Fl recursive Recursively search subdirectories listed. (i.e. force .Nm grep to behave as .Nm rgrep ) . .It Fl S If .Fl R is specified, all symbolic links are followed. The default is not to follow symbolic links. .It Fl s , Fl Fl no-messages Silent mode. Nonexistent and unreadable files are ignored (i.e. their error messages are suppressed). .It Fl U , Fl Fl binary Search binary files, but do not attempt to print them. .It Fl u This option has no effect and is provided only for compatibility with GNU grep. .It Fl V , Fl Fl version Display version information and exit. .It Fl v , Fl Fl invert-match Selected lines are those .Em not matching any of the specified patterns. .It Fl w , Fl Fl word-regexp The expression is searched for as a word (as if surrounded by .Sq [[:<:]] and .Sq [[:>:]] ; see .Xr re_format 7 ) . .It Fl x , Fl Fl line-regexp Only input lines selected against an entire fixed string or regular expression are considered to be matching lines. .It Fl y Equivalent to .Fl i . Obsoleted. .It Fl z , Fl Fl null-data Treat input and output data as sequences of lines terminated by a zero-byte instead of a newline. .It Fl Z , Fl Fl decompress Force .Nm grep to behave as .Nm zgrep . .It Fl Fl binary-files Ns = Ns Ar value Controls searching and printing of binary files. Options are .Ar binary , the default: search binary files but do not print them; .Ar without-match : do not search binary files; and .Ar text : treat all files as text. .Sm off .It Fl Fl context Op = Ar num .Sm on Print .Ar num lines of leading and trailing context. The default is 2. .It Fl Fl line-buffered Force output to be line buffered. By default, output is line buffered when standard output is a terminal and block buffered otherwise. .El .Pp If no file arguments are specified, the standard input is used. +Additionally, +.Dq - +may be used in place of a file name, anywhere that a file name is accepted, to +read from standard input. +This includes both +.Fl f +and file arguments. .Sh EXIT STATUS The .Nm grep utility exits with one of the following values: .Pp .Bl -tag -width flag -compact .It Li 0 One or more lines were selected. .It Li 1 No lines were selected. .It Li \*(Gt1 An error occurred. .El .Sh EXAMPLES To find all occurrences of the word .Sq patricia in a file: .Pp .Dl $ grep 'patricia' myfile .Pp To find all occurrences of the pattern .Ql .Pp at the beginning of a line: .Pp .Dl $ grep '^\e.Pp' myfile .Pp The apostrophes ensure the entire expression is evaluated by .Nm grep instead of by the user's shell. The caret .Ql ^ matches the null string at the beginning of a line, and the .Ql \e escapes the .Ql \&. , which would otherwise match any character. .Pp To find all lines in a file which do not contain the words .Sq foo or .Sq bar : .Pp .Dl $ grep -v -e 'foo' -e 'bar' myfile .Pp A simple example of an extended regular expression: .Pp .Dl $ egrep '19|20|25' calendar .Pp Peruses the file .Sq calendar looking for either 19, 20, or 25. .Sh SEE ALSO .Xr ed 1 , .Xr ex 1 , .Xr gzip 1 , .Xr sed 1 , .Xr re_format 7 .Sh STANDARDS The .Nm utility is compliant with the .St -p1003.1-2008 specification. .Pp The flags .Op Fl AaBbCDdGHhIJLmoPRSUVwZ are extensions to that specification, and the behaviour of the .Fl f flag when used with an empty pattern file is left undefined. .Pp All long options are provided for compatibility with GNU versions of this utility. .Pp Historic versions of the .Nm grep utility also supported the flags .Op Fl ruy . This implementation supports those options; however, their use is strongly discouraged. .Sh HISTORY The .Nm grep command first appeared in .At v6 . Index: stable/11/usr.bin/grep/grep.c =================================================================== --- stable/11/usr.bin/grep/grep.c (revision 354627) +++ stable/11/usr.bin/grep/grep.c (revision 354628) @@ -1,805 +1,801 @@ /* $NetBSD: grep.c,v 1.6 2011/04/18 03:48:23 joerg Exp $ */ /* $FreeBSD$ */ /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav * Copyright (C) 2008-2009 Gabor Kovesdan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #define _WITH_GETLINE #include #include #include #include #ifndef WITHOUT_FASTMATCH #include "fastmatch.h" #endif #include "grep.h" #ifndef WITHOUT_NLS #include nl_catd catalog; #endif /* * Default messags to use when NLS is disabled or no catalogue * is found. */ const char *errstr[] = { "", /* 1*/ "(standard input)", /* 2*/ "cannot read bzip2 compressed file", /* 3*/ "unknown %s option", /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n", /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", /* 7*/ "\t[--null] [pattern] [file ...]\n", /* 8*/ "Binary file %s matches\n", /* 9*/ "%s (BSD grep) %s\n", /* 10*/ "%s (BSD grep, GNU compatible) %s\n", }; /* Flags passed to regcomp() and regexec() */ int cflags = REG_NOSUB | REG_NEWLINE; int eflags = REG_STARTEND; /* XXX TODO: Get rid of this flag. * matchall is a gross hack that means that an empty pattern was passed to us. * It is a necessary evil at the moment because our regex(3) implementation * does not allow for empty patterns, as supported by POSIX's definition of * grammar for BREs/EREs. When libregex becomes available, it would be wise * to remove this and let regex(3) handle the dirty details of empty patterns. */ bool matchall; /* Searching patterns */ unsigned int patterns; static unsigned int pattern_sz; struct pat *pattern; regex_t *r_pattern; #ifndef WITHOUT_FASTMATCH fastmatch_t *fg_pattern; #endif /* Filename exclusion/inclusion patterns */ unsigned int fpatterns, dpatterns; static unsigned int fpattern_sz, dpattern_sz; struct epat *dpattern, *fpattern; /* For regex errors */ char re_error[RE_ERROR_BUF + 1]; /* Command-line flags */ long long Aflag; /* -A x: print x lines trailing each match */ long long Bflag; /* -B x: print x lines leading each match */ bool Hflag; /* -H: always print file name */ bool Lflag; /* -L: only show names of files with no matches */ bool bflag; /* -b: show block numbers for each match */ bool cflag; /* -c: only show a count of matching lines */ bool hflag; /* -h: don't print filename headers */ bool iflag; /* -i: ignore case */ bool lflag; /* -l: only show names of files with matches */ bool mflag; /* -m x: stop reading the files after x matches */ long long mcount; /* count for -m */ long long mlimit; /* requested value for -m */ char fileeol; /* indicator for eol */ bool nflag; /* -n: show line numbers in front of matching lines */ bool oflag; /* -o: print only matching part */ bool qflag; /* -q: quiet mode (don't output anything) */ bool sflag; /* -s: silent mode (ignore errors) */ bool vflag; /* -v: only show non-matching lines */ bool wflag; /* -w: pattern must start and end on word boundaries */ bool xflag; /* -x: pattern must match entire line */ bool lbflag; /* --line-buffered */ bool nullflag; /* --null */ char *label; /* --label */ const char *color; /* --color */ int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ int devbehave = DEV_READ; /* -D: handling of devices */ int dirbehave = DIR_READ; /* -dRr: handling of directories */ int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ bool dexclude, dinclude; /* --exclude-dir and --include-dir */ bool fexclude, finclude; /* --exclude and --include */ enum { BIN_OPT = CHAR_MAX + 1, COLOR_OPT, HELP_OPT, MMAP_OPT, LINEBUF_OPT, LABEL_OPT, NULL_OPT, R_EXCLUDE_OPT, R_INCLUDE_OPT, R_DEXCLUDE_OPT, R_DINCLUDE_OPT }; static inline const char *init_color(const char *); /* Housekeeping */ bool file_err; /* file reading error */ /* * Prints usage information and returns 2. */ static void usage(void) { fprintf(stderr, getstr(4), getprogname()); fprintf(stderr, "%s", getstr(5)); fprintf(stderr, "%s", getstr(6)); fprintf(stderr, "%s", getstr(7)); exit(2); } static const char *optstr = "0123456789A:B:C:D:EFGHIJMLOPSRUVZabcd:e:f:hilm:nopqrsuvwxXyz"; static const struct option long_options[] = { {"binary-files", required_argument, NULL, BIN_OPT}, {"help", no_argument, NULL, HELP_OPT}, {"mmap", no_argument, NULL, MMAP_OPT}, {"line-buffered", no_argument, NULL, LINEBUF_OPT}, {"label", required_argument, NULL, LABEL_OPT}, {"null", no_argument, NULL, NULL_OPT}, {"color", optional_argument, NULL, COLOR_OPT}, {"colour", optional_argument, NULL, COLOR_OPT}, {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, {"include", required_argument, NULL, R_INCLUDE_OPT}, {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, {"after-context", required_argument, NULL, 'A'}, {"text", no_argument, NULL, 'a'}, {"before-context", required_argument, NULL, 'B'}, {"byte-offset", no_argument, NULL, 'b'}, {"context", optional_argument, NULL, 'C'}, {"count", no_argument, NULL, 'c'}, {"devices", required_argument, NULL, 'D'}, {"directories", required_argument, NULL, 'd'}, {"extended-regexp", no_argument, NULL, 'E'}, {"regexp", required_argument, NULL, 'e'}, {"fixed-strings", no_argument, NULL, 'F'}, {"file", required_argument, NULL, 'f'}, {"basic-regexp", no_argument, NULL, 'G'}, {"no-filename", no_argument, NULL, 'h'}, {"with-filename", no_argument, NULL, 'H'}, {"ignore-case", no_argument, NULL, 'i'}, {"bz2decompress", no_argument, NULL, 'J'}, {"files-with-matches", no_argument, NULL, 'l'}, {"files-without-match", no_argument, NULL, 'L'}, {"max-count", required_argument, NULL, 'm'}, {"lzma", no_argument, NULL, 'M'}, {"line-number", no_argument, NULL, 'n'}, {"only-matching", no_argument, NULL, 'o'}, {"quiet", no_argument, NULL, 'q'}, {"silent", no_argument, NULL, 'q'}, {"recursive", no_argument, NULL, 'r'}, {"no-messages", no_argument, NULL, 's'}, {"binary", no_argument, NULL, 'U'}, {"unix-byte-offsets", no_argument, NULL, 'u'}, {"invert-match", no_argument, NULL, 'v'}, {"version", no_argument, NULL, 'V'}, {"word-regexp", no_argument, NULL, 'w'}, {"line-regexp", no_argument, NULL, 'x'}, {"xz", no_argument, NULL, 'X'}, {"null-data", no_argument, NULL, 'z'}, {"decompress", no_argument, NULL, 'Z'}, {NULL, no_argument, NULL, 0} }; /* * Adds a searching pattern to the internal array. */ static void add_pattern(char *pat, size_t len) { - /* Do not add further pattern is we already match everything */ - if (matchall) - return; - /* Check if we can do a shortcut */ if (len == 0) { matchall = true; - for (unsigned int i = 0; i < patterns; i++) { - free(pattern[i].pat); - } - pattern = grep_realloc(pattern, sizeof(struct pat)); - pattern[0].pat = NULL; - pattern[0].len = 0; - patterns = 1; return; } /* Increase size if necessary */ if (patterns == pattern_sz) { pattern_sz *= 2; pattern = grep_realloc(pattern, ++pattern_sz * sizeof(struct pat)); } if (len > 0 && pat[len - 1] == '\n') --len; /* pat may not be NUL-terminated */ pattern[patterns].pat = grep_malloc(len + 1); memcpy(pattern[patterns].pat, pat, len); pattern[patterns].len = len; pattern[patterns].pat[len] = '\0'; ++patterns; } /* * Adds a file include/exclude pattern to the internal array. */ static void add_fpattern(const char *pat, int mode) { /* Increase size if necessary */ if (fpatterns == fpattern_sz) { fpattern_sz *= 2; fpattern = grep_realloc(fpattern, ++fpattern_sz * sizeof(struct epat)); } fpattern[fpatterns].pat = grep_strdup(pat); fpattern[fpatterns].mode = mode; ++fpatterns; } /* * Adds a directory include/exclude pattern to the internal array. */ static void add_dpattern(const char *pat, int mode) { /* Increase size if necessary */ if (dpatterns == dpattern_sz) { dpattern_sz *= 2; dpattern = grep_realloc(dpattern, ++dpattern_sz * sizeof(struct epat)); } dpattern[dpatterns].pat = grep_strdup(pat); dpattern[dpatterns].mode = mode; ++dpatterns; } /* * Reads searching patterns from a file and adds them with add_pattern(). */ static void read_patterns(const char *fn) { struct stat st; FILE *f; char *line; size_t len; ssize_t rlen; - if ((f = fopen(fn, "r")) == NULL) + if (strcmp(fn, "-") == 0) + f = stdin; + else if ((f = fopen(fn, "r")) == NULL) err(2, "%s", fn); if ((fstat(fileno(f), &st) == -1) || (S_ISDIR(st.st_mode))) { fclose(f); return; } len = 0; line = NULL; while ((rlen = getline(&line, &len, f)) != -1) { if (line[0] == '\0') continue; add_pattern(line, line[0] == '\n' ? 0 : (size_t)rlen); } free(line); if (ferror(f)) err(2, "%s", fn); - fclose(f); + if (strcmp(fn, "-") != 0) + fclose(f); } static inline const char * init_color(const char *d) { char *c; c = getenv("GREP_COLOR"); return (c != NULL && c[0] != '\0' ? c : d); } int main(int argc, char *argv[]) { char **aargv, **eargv, *eopts; char *ep; const char *pn; long long l; unsigned int aargc, eargc, i; int c, lastc, needpattern, newarg, prevoptind; + bool matched; setlocale(LC_ALL, ""); #ifndef WITHOUT_NLS catalog = catopen("grep", NL_CAT_LOCALE); #endif /* Check what is the program name of the binary. In this way we can have all the funcionalities in one binary without the need of scripting and using ugly hacks. */ pn = getprogname(); if (pn[0] == 'b' && pn[1] == 'z') { filebehave = FILE_BZIP; pn += 2; } else if (pn[0] == 'x' && pn[1] == 'z') { filebehave = FILE_XZ; pn += 2; } else if (pn[0] == 'l' && pn[1] == 'z') { filebehave = FILE_LZMA; pn += 2; } else if (pn[0] == 'r') { dirbehave = DIR_RECURSE; Hflag = true; } else if (pn[0] == 'z') { filebehave = FILE_GZIP; pn += 1; } switch (pn[0]) { case 'e': grepbehave = GREP_EXTENDED; break; case 'f': grepbehave = GREP_FIXED; break; } lastc = '\0'; newarg = 1; prevoptind = 1; needpattern = 1; fileeol = '\n'; eopts = getenv("GREP_OPTIONS"); /* support for extra arguments in GREP_OPTIONS */ eargc = 0; if (eopts != NULL && eopts[0] != '\0') { char *str; /* make an estimation of how many extra arguments we have */ for (unsigned int j = 0; j < strlen(eopts); j++) if (eopts[j] == ' ') eargc++; eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); eargc = 0; /* parse extra arguments */ while ((str = strsep(&eopts, " ")) != NULL) if (str[0] != '\0') eargv[eargc++] = grep_strdup(str); aargv = (char **)grep_calloc(eargc + argc + 1, sizeof(char *)); aargv[0] = argv[0]; for (i = 0; i < eargc; i++) aargv[i + 1] = eargv[i]; for (int j = 1; j < argc; j++, i++) aargv[i + 1] = argv[j]; aargc = eargc + argc; } else { aargv = argv; aargc = argc; } while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != -1)) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (newarg || !isdigit(lastc)) Aflag = 0; else if (Aflag > LLONG_MAX / 10 - 1) { errno = ERANGE; err(2, NULL); } Aflag = Bflag = (Aflag * 10) + (c - '0'); break; case 'C': if (optarg == NULL) { Aflag = Bflag = 2; break; } /* FALLTHROUGH */ case 'A': /* FALLTHROUGH */ case 'B': errno = 0; l = strtoll(optarg, &ep, 10); if (errno == ERANGE || errno == EINVAL) err(2, NULL); else if (ep[0] != '\0') { errno = EINVAL; err(2, NULL); } else if (l < 0) { errno = EINVAL; err(2, "context argument must be non-negative"); } if (c == 'A') Aflag = l; else if (c == 'B') Bflag = l; else Aflag = Bflag = l; break; case 'a': binbehave = BINFILE_TEXT; break; case 'b': bflag = true; break; case 'c': cflag = true; break; case 'D': if (strcasecmp(optarg, "skip") == 0) devbehave = DEV_SKIP; else if (strcasecmp(optarg, "read") == 0) devbehave = DEV_READ; else errx(2, getstr(3), "--devices"); break; case 'd': if (strcasecmp("recurse", optarg) == 0) { Hflag = true; dirbehave = DIR_RECURSE; } else if (strcasecmp("skip", optarg) == 0) dirbehave = DIR_SKIP; else if (strcasecmp("read", optarg) == 0) dirbehave = DIR_READ; else errx(2, getstr(3), "--directories"); break; case 'E': grepbehave = GREP_EXTENDED; break; case 'e': { char *token; char *string = optarg; while ((token = strsep(&string, "\n")) != NULL) add_pattern(token, strlen(token)); } needpattern = 0; break; case 'F': grepbehave = GREP_FIXED; break; case 'f': read_patterns(optarg); needpattern = 0; break; case 'G': grepbehave = GREP_BASIC; break; case 'H': Hflag = true; break; case 'h': Hflag = false; hflag = true; break; case 'I': binbehave = BINFILE_SKIP; break; case 'i': case 'y': iflag = true; cflags |= REG_ICASE; break; case 'J': #ifdef WITHOUT_BZIP2 errno = EOPNOTSUPP; err(2, "bzip2 support was disabled at compile-time"); #endif filebehave = FILE_BZIP; break; case 'L': lflag = false; Lflag = true; break; case 'l': Lflag = false; lflag = true; break; case 'm': mflag = true; errno = 0; mlimit = mcount = strtoll(optarg, &ep, 10); if (((errno == ERANGE) && (mcount == LLONG_MAX)) || ((errno == EINVAL) && (mcount == 0))) err(2, NULL); else if (ep[0] != '\0') { errno = EINVAL; err(2, NULL); } break; case 'M': filebehave = FILE_LZMA; break; case 'n': nflag = true; break; case 'O': linkbehave = LINK_EXPLICIT; break; case 'o': oflag = true; cflags &= ~REG_NOSUB; break; case 'p': linkbehave = LINK_SKIP; break; case 'q': qflag = true; break; case 'S': linkbehave = LINK_READ; break; case 'R': case 'r': dirbehave = DIR_RECURSE; Hflag = true; break; case 's': sflag = true; break; case 'U': binbehave = BINFILE_BIN; break; case 'u': case MMAP_OPT: filebehave = FILE_MMAP; break; case 'V': #ifdef WITH_GNU printf(getstr(10), getprogname(), VERSION); #else printf(getstr(9), getprogname(), VERSION); #endif exit(0); case 'v': vflag = true; break; case 'w': wflag = true; cflags &= ~REG_NOSUB; break; case 'x': xflag = true; cflags &= ~REG_NOSUB; break; case 'X': filebehave = FILE_XZ; break; case 'z': fileeol = '\0'; break; case 'Z': filebehave = FILE_GZIP; break; case BIN_OPT: if (strcasecmp("binary", optarg) == 0) binbehave = BINFILE_BIN; else if (strcasecmp("without-match", optarg) == 0) binbehave = BINFILE_SKIP; else if (strcasecmp("text", optarg) == 0) binbehave = BINFILE_TEXT; else errx(2, getstr(3), "--binary-files"); break; case COLOR_OPT: color = NULL; if (optarg == NULL || strcasecmp("auto", optarg) == 0 || strcasecmp("tty", optarg) == 0 || strcasecmp("if-tty", optarg) == 0) { char *term; term = getenv("TERM"); if (isatty(STDOUT_FILENO) && term != NULL && strcasecmp(term, "dumb") != 0) color = init_color("01;31"); } else if (strcasecmp("always", optarg) == 0 || strcasecmp("yes", optarg) == 0 || strcasecmp("force", optarg) == 0) { color = init_color("01;31"); } else if (strcasecmp("never", optarg) != 0 && strcasecmp("none", optarg) != 0 && strcasecmp("no", optarg) != 0) errx(2, getstr(3), "--color"); cflags &= ~REG_NOSUB; break; case LABEL_OPT: label = optarg; break; case LINEBUF_OPT: lbflag = true; break; case NULL_OPT: nullflag = true; break; case R_INCLUDE_OPT: finclude = true; add_fpattern(optarg, INCL_PAT); break; case R_EXCLUDE_OPT: fexclude = true; add_fpattern(optarg, EXCL_PAT); break; case R_DINCLUDE_OPT: dinclude = true; add_dpattern(optarg, INCL_PAT); break; case R_DEXCLUDE_OPT: dexclude = true; add_dpattern(optarg, EXCL_PAT); break; case HELP_OPT: default: usage(); } lastc = c; newarg = optind != prevoptind; prevoptind = optind; } aargc -= optind; aargv += optind; /* Empty pattern file matches nothing */ - if (!needpattern && (patterns == 0)) + if (!needpattern && (patterns == 0) && !matchall) exit(1); /* Fail if we don't have any pattern */ if (aargc == 0 && needpattern) usage(); /* Process patterns from command line */ if (aargc != 0 && needpattern) { char *token; char *string = *aargv; while ((token = strsep(&string, "\n")) != NULL) add_pattern(token, strlen(token)); --aargc; ++aargv; } switch (grepbehave) { case GREP_BASIC: break; case GREP_FIXED: /* * regex(3) implementations that support fixed-string searches generally * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag * here. If neither are defined, GREP_FIXED later implies that the * internal literal matcher should be used. Other cflags that have * the same interpretation as REG_NOSPEC and REG_LITERAL should be * similarly added here, and grep.h should be amended to take this into * consideration when defining WITH_INTERNAL_NOSPEC. */ #if defined(REG_NOSPEC) cflags |= REG_NOSPEC; #elif defined(REG_LITERAL) cflags |= REG_LITERAL; #endif break; case GREP_EXTENDED: cflags |= REG_EXTENDED; break; default: /* NOTREACHED */ usage(); } #ifndef WITHOUT_FASTMATCH fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); #endif r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); - /* Don't process any patterns if we have a blank one */ #ifdef WITH_INTERNAL_NOSPEC - if (!matchall && grepbehave != GREP_FIXED) { + if (grepbehave != GREP_FIXED) { #else - if (!matchall) { + { #endif /* Check if cheating is allowed (always is for fgrep). */ for (i = 0; i < patterns; ++i) { #ifndef WITHOUT_FASTMATCH /* * Attempt compilation with fastmatch regex and * fallback to regex(3) if it fails. */ if (fastncomp(&fg_pattern[i], pattern[i].pat, pattern[i].len, cflags) == 0) continue; #endif c = regcomp(&r_pattern[i], pattern[i].pat, cflags); if (c != 0) { regerror(c, &r_pattern[i], re_error, RE_ERROR_BUF); errx(2, "%s", re_error); } } } if (lbflag) setlinebuf(stdout); if ((aargc == 0 || aargc == 1) && !Hflag) hflag = true; if (aargc == 0 && dirbehave != DIR_RECURSE) exit(!procfile("-")); if (dirbehave == DIR_RECURSE) - c = grep_tree(aargv); + matched = grep_tree(aargv); else - for (c = 0; aargc--; ++aargv) { + for (matched = false; aargc--; ++aargv) { if ((finclude || fexclude) && !file_matching(*aargv)) continue; - c+= procfile(*aargv); + if (procfile(*aargv)) + matched = true; } #ifndef WITHOUT_NLS catclose(catalog); #endif /* Find out the correct return value according to the results and the command line option. */ - exit(c ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); + if (Lflag) + matched = !matched; + + exit(matched ? (file_err ? (qflag ? 0 : 2) : 0) : (file_err ? 2 : 1)); } Index: stable/11/usr.bin/grep/grep.h =================================================================== --- stable/11/usr.bin/grep/grep.h (revision 354627) +++ stable/11/usr.bin/grep/grep.h (revision 354628) @@ -1,164 +1,179 @@ /* $NetBSD: grep.h,v 1.5 2011/02/27 17:33:37 joerg Exp $ */ /* $OpenBSD: grep.h,v 1.15 2010/04/05 03:03:55 tedu Exp $ */ /* $FreeBSD$ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav * Copyright (c) 2008-2009 Gabor Kovesdan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #ifndef WITHOUT_FASTMATCH #include "fastmatch.h" #endif #ifdef WITHOUT_NLS #define getstr(n) errstr[n] #else #include extern nl_catd catalog; #define getstr(n) catgets(catalog, 1, n, errstr[n]) #endif extern const char *errstr[]; #define VERSION "2.6.0-FreeBSD" #define GREP_FIXED 0 #define GREP_BASIC 1 #define GREP_EXTENDED 2 #if !defined(REG_NOSPEC) && !defined(REG_LITERAL) #define WITH_INTERNAL_NOSPEC #endif #define BINFILE_BIN 0 #define BINFILE_SKIP 1 #define BINFILE_TEXT 2 #define FILE_STDIO 0 #define FILE_MMAP 1 #define FILE_GZIP 2 #define FILE_BZIP 3 #define FILE_XZ 4 #define FILE_LZMA 5 #define DIR_READ 0 #define DIR_SKIP 1 #define DIR_RECURSE 2 #define DEV_READ 0 #define DEV_SKIP 1 #define LINK_READ 0 #define LINK_EXPLICIT 1 #define LINK_SKIP 2 #define EXCL_PAT 0 #define INCL_PAT 1 #define MAX_MATCHES 32 struct file { int fd; bool binary; }; struct str { off_t boff; off_t off; size_t len; char *dat; char *file; int line_no; }; struct pat { char *pat; int len; }; struct epat { char *pat; int mode; }; +/* + * Parsing context; used to hold things like matches made and + * other useful bits + */ +struct parsec { + regmatch_t matches[MAX_MATCHES]; /* Matches made */ + /* XXX TODO: This should be a chunk, not a line */ + struct str ln; /* Current line */ + size_t lnstart; /* Position in line */ + size_t matchidx; /* Latest match index */ + int printed; /* Metadata printed? */ + bool binary; /* Binary file? */ + bool cntlines; /* Count lines? */ +}; + /* Flags passed to regcomp() and regexec() */ extern int cflags, eflags; /* Command line flags */ extern bool Eflag, Fflag, Gflag, Hflag, Lflag, bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag, sflag, vflag, wflag, xflag; extern bool dexclude, dinclude, fexclude, finclude, lbflag, nullflag; extern long long Aflag, Bflag; extern long long mcount; extern long long mlimit; extern char fileeol; extern char *label; extern const char *color; extern int binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave; extern bool file_err, matchall; extern unsigned int dpatterns, fpatterns, patterns; extern struct pat *pattern; extern struct epat *dpattern, *fpattern; extern regex_t *er_pattern, *r_pattern; #ifndef WITHOUT_FASTMATCH extern fastmatch_t *fg_pattern; #endif /* For regex errors */ #define RE_ERROR_BUF 512 extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */ /* util.c */ bool file_matching(const char *fname); -int procfile(const char *fn); -int grep_tree(char **argv); +bool procfile(const char *fn); +bool grep_tree(char **argv); void *grep_malloc(size_t size); void *grep_calloc(size_t nmemb, size_t size); void *grep_realloc(void *ptr, size_t size); char *grep_strdup(const char *str); void grep_printline(struct str *line, int sep); /* queue.c */ bool enqueue(struct str *x); void printqueue(void); void clearqueue(void); /* file.c */ void grep_close(struct file *f); struct file *grep_open(const char *path); -char *grep_fgetln(struct file *f, size_t *len); +char *grep_fgetln(struct file *f, struct parsec *pc); Index: stable/11/usr.bin/grep/tests/grep_freebsd_test.sh =================================================================== --- stable/11/usr.bin/grep/tests/grep_freebsd_test.sh (revision 354627) +++ stable/11/usr.bin/grep/tests/grep_freebsd_test.sh (revision 354628) @@ -1,88 +1,114 @@ # # Copyright (c) 2017 Kyle Evans # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # $FreeBSD$ # What grep(1) are we working with? # - 0 : bsdgrep # - 1 : gnu grep 2.51 (base) # - 2 : gnu grep (ports) GREP_TYPE_BSD=0 GREP_TYPE_GNU_FREEBSD=1 GREP_TYPE_GNU=2 GREP_TYPE_UNKNOWN=3 grep_type() { local grep_version=$(grep --version) case "$grep_version" in *"BSD grep"*) return $GREP_TYPE_BSD ;; *"GNU grep"*) case "$grep_version" in *2.5.1-FreeBSD*) return $GREP_TYPE_GNU_FREEBSD ;; *) return $GREP_TYPE_GNU ;; esac ;; esac atf_fail "unknown grep type: $grep_version" } atf_test_case grep_r_implied grep_r_implied_body() { grep_type if [ $? -ne $GREP_TYPE_BSD ]; then atf_skip "this test only works with bsdgrep(1)" fi (cd "$(atf_get_srcdir)" && grep -r --exclude="*.out" -e "test" .) > d_grep_r_implied.out atf_check -s exit:0 -x \ "(cd $(atf_get_srcdir) && grep -r --exclude=\"*.out\" -e \"test\") | diff d_grep_r_implied.out -" } atf_test_case rgrep rgrep_head() { atf_set "require.progs" "rgrep" } rgrep_body() { atf_check -o save:d_grep_r_implied.out grep -r --exclude="*.out" -e "test" "$(atf_get_srcdir)" atf_check -o file:d_grep_r_implied.out rgrep --exclude="*.out" -e "test" "$(atf_get_srcdir)" } +atf_test_case gnuext +gnuext_body() +{ + grep_type + _type=$? + if [ $_type -eq $GREP_TYPE_BSD ]; then + atf_expect_fail "this test requires GNU extensions in regex(3)" + elif [ $_type -eq $GREP_TYPE_GNU_FREEBSD ]; then + atf_expect_fail "\\s and \\S are known to be buggy in base gnugrep" + fi + + atf_check -o save:grep_alnum.out grep -o '[[:alnum:]]' /COPYRIGHT + atf_check -o file:grep_alnum.out grep -o '\w' /COPYRIGHT + + atf_check -o save:grep_nalnum.out grep -o '[^[:alnum:]]' /COPYRIGHT + atf_check -o file:grep_nalnum.out grep -o '\W' /COPYRIGHT + + atf_check -o save:grep_space.out grep -o '[[:space:]]' /COPYRIGHT + atf_check -o file:grep_space.out grep -o '\s' /COPYRIGHT + + atf_check -o save:grep_nspace.out grep -o '[^[:space:]]' /COPYRIGHT + atf_check -o file:grep_nspace.out grep -o '\S' /COPYRIGHT + +} + atf_init_test_cases() { atf_add_test_case grep_r_implied atf_add_test_case rgrep + atf_add_test_case gnuext } Index: stable/11/usr.bin/grep/util.c =================================================================== --- stable/11/usr.bin/grep/util.c (revision 354627) +++ stable/11/usr.bin/grep/util.c (revision 354628) @@ -1,741 +1,790 @@ /* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ /* $FreeBSD$ */ /* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav * Copyright (C) 2008-2010 Gabor Kovesdan * Copyright (C) 2017 Kyle Evans * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef WITHOUT_FASTMATCH #include "fastmatch.h" #endif #include "grep.h" static bool first_match = true; /* - * Parsing context; used to hold things like matches made and - * other useful bits + * Match printing context */ -struct parsec { - regmatch_t matches[MAX_MATCHES]; /* Matches made */ - struct str ln; /* Current line */ - size_t lnstart; /* Position in line */ - size_t matchidx; /* Latest match index */ - int printed; /* Metadata printed? */ - bool binary; /* Binary file? */ +struct mprintc { + long long tail; /* Number of trailing lines to record */ + int last_outed; /* Number of lines since last output */ + bool doctx; /* Printing context? */ + bool printmatch; /* Printing matches? */ + bool same_file; /* Same file as previously printed? */ }; +static void procmatch_match(struct mprintc *mc, struct parsec *pc); +static void procmatch_nomatch(struct mprintc *mc, struct parsec *pc); +static bool procmatches(struct mprintc *mc, struct parsec *pc, bool matched); #ifdef WITH_INTERNAL_NOSPEC static int litexec(const struct pat *pat, const char *string, size_t nmatch, regmatch_t pmatch[]); #endif -static int procline(struct parsec *pc); +static bool procline(struct parsec *pc); static void printline(struct parsec *pc, int sep); static void printline_metadata(struct str *line, int sep); bool file_matching(const char *fname) { char *fname_base, *fname_buf; bool ret; ret = finclude ? false : true; fname_buf = strdup(fname); if (fname_buf == NULL) err(2, "strdup"); fname_base = basename(fname_buf); for (unsigned int i = 0; i < fpatterns; ++i) { if (fnmatch(fpattern[i].pat, fname, 0) == 0 || - fnmatch(fpattern[i].pat, fname_base, 0) == 0) { - if (fpattern[i].mode == EXCL_PAT) { - ret = false; - break; - } else - ret = true; - } + fnmatch(fpattern[i].pat, fname_base, 0) == 0) + /* + * The last pattern matched wins exclusion/inclusion + * rights, so we can't reasonably bail out early here. + */ + ret = (fpattern[i].mode != EXCL_PAT); } free(fname_buf); return (ret); } static inline bool dir_matching(const char *dname) { bool ret; ret = dinclude ? false : true; for (unsigned int i = 0; i < dpatterns; ++i) { - if (dname != NULL && - fnmatch(dpattern[i].pat, dname, 0) == 0) { - if (dpattern[i].mode == EXCL_PAT) - return (false); - else - ret = true; - } + if (dname != NULL && fnmatch(dpattern[i].pat, dname, 0) == 0) + /* + * The last pattern matched wins exclusion/inclusion + * rights, so we can't reasonably bail out early here. + */ + ret = (dpattern[i].mode != EXCL_PAT); } return (ret); } /* * Processes a directory when a recursive search is performed with * the -R option. Each appropriate file is passed to procfile(). */ -int +bool grep_tree(char **argv) { FTS *fts; FTSENT *p; - int c, fts_flags; - bool ok; + int fts_flags; + bool matched, ok; const char *wd[] = { ".", NULL }; - c = fts_flags = 0; + matched = false; + /* This switch effectively initializes 'fts_flags' */ switch(linkbehave) { case LINK_EXPLICIT: fts_flags = FTS_COMFOLLOW; break; case LINK_SKIP: fts_flags = FTS_PHYSICAL; break; default: fts_flags = FTS_LOGICAL; - } fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; fts = fts_open((argv[0] == NULL) ? __DECONST(char * const *, wd) : argv, fts_flags, NULL); if (fts == NULL) err(2, "fts_open"); while ((p = fts_read(fts)) != NULL) { switch (p->fts_info) { case FTS_DNR: /* FALLTHROUGH */ case FTS_ERR: file_err = true; if(!sflag) warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); break; case FTS_D: /* FALLTHROUGH */ case FTS_DP: if (dexclude || dinclude) if (!dir_matching(p->fts_name) || !dir_matching(p->fts_path)) fts_set(fts, p, FTS_SKIP); break; case FTS_DC: /* Print a warning for recursive directory loop */ warnx("warning: %s: recursive directory loop", - p->fts_path); + p->fts_path); break; default: /* Check for file exclusion/inclusion */ ok = true; if (fexclude || finclude) ok &= file_matching(p->fts_path); - if (ok) - c += procfile(p->fts_path); + if (ok && procfile(p->fts_path)) + matched = true; break; } } fts_close(fts); - return (c); + return (matched); } +static void +procmatch_match(struct mprintc *mc, struct parsec *pc) +{ + + if (mc->doctx) { + if (!first_match && (!mc->same_file || mc->last_outed > 0)) + printf("--\n"); + if (Bflag > 0) + printqueue(); + mc->tail = Aflag; + } + + /* Print the matching line, but only if not quiet/binary */ + if (mc->printmatch) { + printline(pc, ':'); + while (pc->matchidx >= MAX_MATCHES) { + /* Reset matchidx and try again */ + pc->matchidx = 0; + if (procline(pc) == !vflag) + printline(pc, ':'); + else + break; + } + first_match = false; + mc->same_file = true; + mc->last_outed = 0; + } +} + +static void +procmatch_nomatch(struct mprintc *mc, struct parsec *pc) +{ + + /* Deal with any -A context as needed */ + if (mc->tail > 0) { + grep_printline(&pc->ln, '-'); + mc->tail--; + if (Bflag > 0) + clearqueue(); + } else if (Bflag == 0 || (Bflag > 0 && enqueue(&pc->ln))) + /* + * Enqueue non-matching lines for -B context. If we're not + * actually doing -B context or if the enqueue resulted in a + * line being rotated out, then go ahead and increment + * last_outed to signify a gap between context/match. + */ + ++mc->last_outed; +} + /* + * Process any matches in the current parsing context, return a boolean + * indicating whether we should halt any further processing or not. 'true' to + * continue processing, 'false' to halt. + */ +static bool +procmatches(struct mprintc *mc, struct parsec *pc, bool matched) +{ + + /* + * XXX TODO: This should loop over pc->matches and handle things on a + * line-by-line basis, setting up a `struct str` as needed. + */ + /* Deal with any -B context or context separators */ + if (matched) { + procmatch_match(mc, pc); + + /* Count the matches if we have a match limit */ + if (mflag) { + /* XXX TODO: Decrement by number of matched lines */ + mcount -= 1; + if (mcount <= 0) + return (false); + } + } else if (mc->doctx) + procmatch_nomatch(mc, pc); + + return (true); +} + +/* * Opens a file and processes it. Each file is processed line-by-line * passing the lines to procline(). */ -int +bool procfile(const char *fn) { struct parsec pc; - long long tail; + struct mprintc mc; struct file *f; struct stat sb; - struct str *ln; mode_t s; - int c, last_outed, t; - bool doctx, printmatch, same_file; + int lines; + bool line_matched; if (strcmp(fn, "-") == 0) { fn = label != NULL ? label : getstr(1); f = grep_open(NULL); } else { - if (!stat(fn, &sb)) { + if (stat(fn, &sb) == 0) { /* Check if we need to process the file */ s = sb.st_mode & S_IFMT; - if (s == S_IFDIR && dirbehave == DIR_SKIP) - return (0); - if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK - || s == S_IFSOCK) && devbehave == DEV_SKIP) - return (0); + if (dirbehave == DIR_SKIP && s == S_IFDIR) + return (false); + if (devbehave == DEV_SKIP && (s == S_IFIFO || + s == S_IFCHR || s == S_IFBLK || s == S_IFSOCK)) + return (false); } f = grep_open(fn); } if (f == NULL) { file_err = true; if (!sflag) warn("%s", fn); - return (0); + return (false); } - /* Convenience */ - ln = &pc.ln; - pc.ln.file = grep_malloc(strlen(fn) + 1); - strcpy(pc.ln.file, fn); + pc.ln.file = grep_strdup(fn); pc.ln.line_no = 0; pc.ln.len = 0; pc.ln.boff = 0; pc.ln.off = -1; pc.binary = f->binary; - pc.printed = 0; - tail = 0; - last_outed = 0; - same_file = false; - doctx = false; - printmatch = true; + pc.cntlines = false; + memset(&mc, 0, sizeof(mc)); + mc.printmatch = true; if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || lflag || Lflag) - printmatch = false; - if (printmatch && (Aflag != 0 || Bflag != 0)) - doctx = true; + mc.printmatch = false; + if (mc.printmatch && (Aflag != 0 || Bflag != 0)) + mc.doctx = true; + if (mc.printmatch && (Aflag != 0 || Bflag != 0 || mflag || nflag)) + pc.cntlines = true; mcount = mlimit; - for (c = 0; c == 0 || !(lflag || qflag); ) { + for (lines = 0; lines == 0 || !(lflag || qflag); ) { + /* + * XXX TODO: We need to revisit this in a chunking world. We're + * not going to be doing per-line statistics because of the + * overhead involved. procmatches can figure that stuff out as + * needed. */ /* Reset per-line statistics */ pc.printed = 0; pc.matchidx = 0; pc.lnstart = 0; pc.ln.boff = 0; pc.ln.off += pc.ln.len + 1; - if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || + /* XXX TODO: Grab a chunk */ + if ((pc.ln.dat = grep_fgetln(f, &pc)) == NULL || pc.ln.len == 0) break; if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) --pc.ln.len; pc.ln.line_no++; /* Return if we need to skip a binary file */ if (pc.binary && binbehave == BINFILE_SKIP) { grep_close(f); free(pc.ln.file); free(f); return (0); } - if ((t = procline(&pc)) == 0) - ++c; + line_matched = procline(&pc) == !vflag; + if (line_matched) + ++lines; - /* Deal with any -B context or context separators */ - if (t == 0 && doctx) { - if (!first_match && (!same_file || last_outed > 0)) - printf("--\n"); - if (Bflag > 0) - printqueue(); - tail = Aflag; - } - /* Print the matching line, but only if not quiet/binary */ - if (t == 0 && printmatch) { - printline(&pc, ':'); - while (pc.matchidx >= MAX_MATCHES) { - /* Reset matchidx and try again */ - pc.matchidx = 0; - if (procline(&pc) == 0) - printline(&pc, ':'); - else - break; - } - first_match = false; - same_file = true; - last_outed = 0; - } - if (t != 0 && doctx) { - /* Deal with any -A context */ - if (tail > 0) { - grep_printline(&pc.ln, '-'); - tail--; - if (Bflag > 0) - clearqueue(); - } else { - /* - * Enqueue non-matching lines for -B context. - * If we're not actually doing -B context or if - * the enqueue resulted in a line being rotated - * out, then go ahead and increment last_outed - * to signify a gap between context/match. - */ - if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) - ++last_outed; - } - } - - /* Count the matches if we have a match limit */ - if (t == 0 && mflag) { - --mcount; - if (mflag && mcount <= 0) - break; - } - + /* Halt processing if we hit our match limit */ + if (!procmatches(&mc, &pc, line_matched)) + break; } if (Bflag > 0) clearqueue(); grep_close(f); if (cflag) { if (!hflag) printf("%s:", pc.ln.file); - printf("%u\n", c); + printf("%u\n", lines); } - if (lflag && !qflag && c != 0) + if (lflag && !qflag && lines != 0) printf("%s%c", fn, nullflag ? 0 : '\n'); - if (Lflag && !qflag && c == 0) + if (Lflag && !qflag && lines == 0) printf("%s%c", fn, nullflag ? 0 : '\n'); - if (c && !cflag && !lflag && !Lflag && + if (lines != 0 && !cflag && !lflag && !Lflag && binbehave == BINFILE_BIN && f->binary && !qflag) printf(getstr(8), fn); free(pc.ln.file); free(f); - return (c); + return (lines != 0); } #ifdef WITH_INTERNAL_NOSPEC /* * Internal implementation of literal string search within a string, modeled * after regexec(3), for use when the regex(3) implementation doesn't offer * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD * config, but in other scenarios such as building against libgnuregex or on * some non-FreeBSD OSes. */ static int litexec(const struct pat *pat, const char *string, size_t nmatch, regmatch_t pmatch[]) { char *(*strstr_fn)(const char *, const char *); char *sub, *subject; const char *search; size_t idx, n, ofs, stringlen; if (cflags & REG_ICASE) strstr_fn = strcasestr; else strstr_fn = strstr; idx = 0; ofs = pmatch[0].rm_so; stringlen = pmatch[0].rm_eo; if (ofs >= stringlen) return (REG_NOMATCH); subject = strndup(string, stringlen); if (subject == NULL) return (REG_ESPACE); for (n = 0; ofs < stringlen;) { search = (subject + ofs); if ((unsigned long)pat->len > strlen(search)) break; sub = strstr_fn(search, pat->pat); /* * Ignoring the empty string possibility due to context: grep optimizes * for empty patterns and will never reach this point. */ if (sub == NULL) break; ++n; /* Fill in pmatch if necessary */ if (nmatch > 0) { pmatch[idx].rm_so = ofs + (sub - search); pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len; if (++idx == nmatch) break; ofs = pmatch[idx].rm_so + 1; } else /* We only needed to know if we match or not */ break; } free(subject); if (n > 0 && nmatch > 0) for (n = idx; n < nmatch; ++n) pmatch[n].rm_so = pmatch[n].rm_eo = -1; return (n > 0 ? 0 : REG_NOMATCH); } #endif /* WITH_INTERNAL_NOSPEC */ #define iswword(x) (iswalnum((x)) || (x) == L'_') /* * Processes a line comparing it with the specified patterns. Each pattern * is looped to be compared along with the full string, saving each and every * match, which is necessary to colorize the output and to count the * matches. The matching lines are passed to printline() to display the * appropriate output. */ -static int +static bool procline(struct parsec *pc) { regmatch_t pmatch, lastmatch, chkmatch; wchar_t wbegin, wend; size_t st, nst; unsigned int i; - int c = 0, r = 0, lastmatches = 0, leflags = eflags; + int r = 0, leflags = eflags; size_t startm = 0, matchidx; unsigned int retry; + bool lastmatched, matched; matchidx = pc->matchidx; - /* Special case: empty pattern with -w flag, check first character */ - if (matchall && wflag) { + /* + * With matchall (empty pattern), we can try to take some shortcuts. + * Emtpy patterns trivially match every line except in the -w and -x + * cases. For -w (whole-word) cases, we only match if the first + * character isn't a word-character. For -x (whole-line) cases, we only + * match if the line is empty. + */ + if (matchall) { if (pc->ln.len == 0) - return (0); - wend = L' '; - if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) - return (1); - else - return (0); - } else if (matchall) - return (0); + return (true); + if (wflag) { + wend = L' '; + if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 && + !iswword(wend)) + return (true); + } else if (!xflag) + return (true); + /* + * If we don't have any other patterns, we really don't match. + * If we do have other patterns, we must fall through and check + * them. + */ + if (patterns == 0) + return (false); + } + + matched = false; st = pc->lnstart; nst = 0; /* Initialize to avoid a false positive warning from GCC. */ lastmatch.rm_so = lastmatch.rm_eo = 0; /* Loop to process the whole line */ while (st <= pc->ln.len) { - lastmatches = 0; + lastmatched = false; startm = matchidx; retry = 0; if (st > 0 && pc->ln.dat[st - 1] != fileeol) leflags |= REG_NOTBOL; /* Loop to compare with all the patterns */ for (i = 0; i < patterns; i++) { pmatch.rm_so = st; pmatch.rm_eo = pc->ln.len; #ifdef WITH_INTERNAL_NOSPEC if (grepbehave == GREP_FIXED) r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch); else #endif #ifndef WITHOUT_FASTMATCH if (fg_pattern[i].pattern) r = fastexec(&fg_pattern[i], pc->ln.dat, 1, &pmatch, leflags); else #endif r = regexec(&r_pattern[i], pc->ln.dat, 1, &pmatch, leflags); if (r != 0) continue; /* Check for full match */ if (xflag && (pmatch.rm_so != 0 || (size_t)pmatch.rm_eo != pc->ln.len)) continue; /* Check for whole word match */ #ifndef WITHOUT_FASTMATCH if (wflag || fg_pattern[i].word) { #else if (wflag) { #endif wbegin = wend = L' '; if (pmatch.rm_so != 0 && sscanf(&pc->ln.dat[pmatch.rm_so - 1], "%lc", &wbegin) != 1) r = REG_NOMATCH; else if ((size_t)pmatch.rm_eo != pc->ln.len && sscanf(&pc->ln.dat[pmatch.rm_eo], "%lc", &wend) != 1) r = REG_NOMATCH; else if (iswword(wbegin) || iswword(wend)) r = REG_NOMATCH; /* * If we're doing whole word matching and we * matched once, then we should try the pattern * again after advancing just past the start of * the earliest match. This allows the pattern * to match later on in the line and possibly * still match a whole word. */ if (r == REG_NOMATCH && (retry == pc->lnstart || (unsigned int)pmatch.rm_so + 1 < retry)) retry = pmatch.rm_so + 1; if (r == REG_NOMATCH) continue; } - lastmatches++; + lastmatched = true; lastmatch = pmatch; if (matchidx == 0) - c++; + matched = true; /* * Replace previous match if the new one is earlier * and/or longer. This will lead to some amount of * extra work if -o/--color are specified, but it's * worth it from a correctness point of view. */ if (matchidx > startm) { chkmatch = pc->matches[matchidx - 1]; if (pmatch.rm_so < chkmatch.rm_so || (pmatch.rm_so == chkmatch.rm_so && (pmatch.rm_eo - pmatch.rm_so) > (chkmatch.rm_eo - chkmatch.rm_so))) { pc->matches[matchidx - 1] = pmatch; nst = pmatch.rm_eo; } } else { /* Advance as normal if not */ pc->matches[matchidx++] = pmatch; nst = pmatch.rm_eo; } /* avoid excessive matching - skip further patterns */ if ((color == NULL && !oflag) || qflag || lflag || matchidx >= MAX_MATCHES) { pc->lnstart = nst; - lastmatches = 0; + lastmatched = false; break; } } /* * Advance to just past the start of the earliest match, try * again just in case we still have a chance to match later in * the string. */ - if (lastmatches == 0 && retry > pc->lnstart) { + if (!lastmatched && retry > pc->lnstart) { st = retry; continue; } + /* XXX TODO: We will need to keep going, since we're chunky */ /* One pass if we are not recording matches */ if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) break; /* If we didn't have any matches or REG_NOSUB set */ - if (lastmatches == 0 || (cflags & REG_NOSUB)) + if (!lastmatched || (cflags & REG_NOSUB)) nst = pc->ln.len; - if (lastmatches == 0) + if (!lastmatched) /* No matches */ break; else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) /* Zero-length match -- advance one more so we don't get stuck */ nst++; /* Advance st based on previous matches */ st = nst; pc->lnstart = st; } /* Reflect the new matchidx in the context */ pc->matchidx = matchidx; - if (vflag) - c = !c; - return (c ? 0 : 1); + return matched; } /* * Safe malloc() for internal use. */ void * grep_malloc(size_t size) { void *ptr; if ((ptr = malloc(size)) == NULL) err(2, "malloc"); return (ptr); } /* * Safe calloc() for internal use. */ void * grep_calloc(size_t nmemb, size_t size) { void *ptr; if ((ptr = calloc(nmemb, size)) == NULL) err(2, "calloc"); return (ptr); } /* * Safe realloc() for internal use. */ void * grep_realloc(void *ptr, size_t size) { if ((ptr = realloc(ptr, size)) == NULL) err(2, "realloc"); return (ptr); } /* * Safe strdup() for internal use. */ char * grep_strdup(const char *str) { char *ret; if ((ret = strdup(str)) == NULL) err(2, "strdup"); return (ret); } /* * Print an entire line as-is, there are no inline matches to consider. This is * used for printing context. */ void grep_printline(struct str *line, int sep) { printline_metadata(line, sep); fwrite(line->dat, line->len, 1, stdout); putchar(fileeol); } static void printline_metadata(struct str *line, int sep) { bool printsep; printsep = false; if (!hflag) { if (!nullflag) { fputs(line->file, stdout); printsep = true; } else { printf("%s", line->file); putchar(0); } } if (nflag) { if (printsep) putchar(sep); printf("%d", line->line_no); printsep = true; } if (bflag) { if (printsep) putchar(sep); printf("%lld", (long long)(line->off + line->boff)); printsep = true; } if (printsep) putchar(sep); } /* * Prints a matching line according to the command line options. */ static void printline(struct parsec *pc, int sep) { size_t a = 0; size_t i, matchidx; regmatch_t match; /* If matchall, everything matches but don't actually print for -o */ if (oflag && matchall) return; matchidx = pc->matchidx; /* --color and -o */ if ((oflag || color) && matchidx > 0) { /* Only print metadata once per line if --color */ if (!oflag && pc->printed == 0) printline_metadata(&pc->ln, sep); for (i = 0; i < matchidx; i++) { match = pc->matches[i]; /* Don't output zero length matches */ if (match.rm_so == match.rm_eo) continue; /* * Metadata is printed on a per-line basis, so every * match gets file metadata with the -o flag. */ if (oflag) { pc->ln.boff = match.rm_so; printline_metadata(&pc->ln, sep); } else fwrite(pc->ln.dat + a, match.rm_so - a, 1, stdout); if (color) fprintf(stdout, "\33[%sm\33[K", color); fwrite(pc->ln.dat + match.rm_so, match.rm_eo - match.rm_so, 1, stdout); if (color) fprintf(stdout, "\33[m\33[K"); a = match.rm_eo; if (oflag) putchar('\n'); } if (!oflag) { if (pc->ln.len - a > 0) fwrite(pc->ln.dat + a, pc->ln.len - a, 1, stdout); putchar('\n'); } } else grep_printline(&pc->ln, sep); pc->printed++; } Index: stable/11 =================================================================== --- stable/11 (revision 354627) +++ stable/11 (revision 354628) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r320414,328559,332805-332806,332809,332832,332850-332852,332856,332858,332876,333351,334803,334806-334809,334821,334837,334889,335188,351769,352691