#!/usr/bin/perl -w # # grepfields: do 'grep', but only within certain fields of each records. # # 2008-08-09: Written by Steven J. DeRose, sderose@acm.org. # 2011-01-12ff: Integrate csvFormat.pm. # 2011-02-19 sjd: Much cleaning. Organize options. # # To do: # Lots of testing. # Option like -o except to get whole fields. # Match-on-whole-fields option. # Field specs like cut: -f 1-3,12-15 # Base for line-numbers and byte offsets. # with --only-matching, include all field delims? delims between? use strict; use Getopt::Long; use csvFormat; my $version = "2011-02-19"; # Options like regular 'grep' (named like the long grep options) # These are listed in the same order as in the MacOS man page for 'grep'. # They are named for the single-char name, "_", and the long name. # Options that just set other options, merely have a comment here. # # Options marked with "###" are per-expr options. They are stored in the # current toFind object; when an actual regex is set (say, via -e), it # is also stored there, and a new toFind object is started, inheriting the # option values then in effect (not including -e). # my $A_afterContext = 0; # num lines of following context to show # a_text # (sets binaryFiles=text) my $B_beforeContext = 0; # num lines of preceding context to show # C_context # (sets afterContext and beforeContext) my $b_byteOffset = 0; # print byte offset my $binaryFiles = "sniff"; # binary, without-match, text, (sniff) my $color = (defined $ENV{"USE_COLOR"} && -t STDOUT) ? 1:0; my $c_count = 0; # just print count my $D_devices = "read"; # read or skip (FIFO and socket files) my $d_directories = "read"; # read or skip or recurse my $E_extendedRegexp = 0; ### (### means per-expr options) my $e_regexp = ""; ### The actual regex to look for my $F_fixedStrings = 0; ### my $f_file = ""; ### Take patterns from a file my $G_basicRexexp = 1; ### my $H_withFilename = 0; # always show filenames # h_noFilename # (sets H_withFilename) # help # (special) # I # (sets binaryFiles=without-match) my $i_ignoreCase = 0; ### my $L_filesWithoutMatch = 0; ### my $l_filesWithMatches = 0; ### my $m_maxCount = 0; # mmap # (not supported) my $n_lineNumber = 0; # show line numbers my $o_onlyMatching = 0; # show only matching portions my $label = ""; # 'filename' to use for STDIN. # line-buffered # (not supported) my $P_perlRegexp = 1; ### Perl-style regular expressions my $q_quiet = 0; # R_r_recursive # (same as --directories=recurse) my $include = ""; # recurse only in dirs matching this expr my $exclude = ""; # don't recurse into dirs matching pattern my $s_noMessages = 0; # suppress !-e messages my $U_binary = 0; # treat all files as binary (else sniff) my $u_unixByteOffsets = 0; # report as if Unix-style text (strip CR) # V_version # (special) my $v_invertMatch = 0; ### my $w_wordRegexp = 0; ### my $x_lineRegexp = 0; ### # y # (same as -i) my $Z_null = 0; # Write NUL instead of the usual. # This object holds a regex, and the options in effect when it was specified. # my @exprs = (); # An array of toFind objects. my $curTF = new toFind(); $curTF->setRegexpType($E_extendedRegexp, $F_fixedStrings, $f_file, $G_basicRexexp, $P_perlRegexp, $w_wordRegexp, $x_lineRegexp, $i_ignoreCase, $L_filesWithoutMatch, $l_filesWithMatches, $v_invertMatch ); # Options not from 'grep' # my $base = 10; # Base to show offsets and line numbers in my $combine = "all"; # Require all, or just any, exprs to match? #my $excludeFrom = ""; # Path to a file listing patterns for -exclude my $fileSep = "\n--\n"; # Put between context-groups my $oEncoding = ""; # Output char encoding my $onlyFields = 0; # Only return fields that matched. my $verbose = 0; # More messages (repeatable) # CSV format options # my $basicType = "CSV"; my $delim = ""; # aka recordSep my $escape = ""; my $expect = 0; my $header = 0; my @names = (); my $msQuoting = 0; my $quote = ""; my $subfield = ""; my $iencoding = ""; my $ilineends = ""; ############################################################################# # Process options # Getopt::Long::Configure ("no_ignore_case"); # my $result = GetOptions( # csvFormat (input) options "basicType=s" => \$basicType, "delim=s" => \$delim, "escape=s" => \$escape, "expect=n" => \$expect, "header!" => \$header, "msquoting!" => \$msQuoting, "names=s" => \@names, "quote=s" => \$quote, "iencoding=s" => \$iencoding, "ilineends=s" => \$ilineends, "unicode!" => sub { $iencoding = "utf8"; }, # grep-like options "A|after-context=n" => \$A_afterContext, "a|text" => sub { $binaryFiles = "text"; }, "B|before-context=n" => \$B_beforeContext, "C|context=n" => sub { if ($_[1] > $A_afterContext) { $A_afterContext = $_[1]; } if ($_[1] > $B_beforeContext) { $B_beforeContext = $_[1]; } }, "b|byte-offset!" => \$b_byteOffset, "binaryFiles" => \$binaryFiles, "color|colour=s" => \$color, "c|count!" => \$c_count, "D|devices=s" => \$D_devices, "d|directories=s" => \$d_directories, "E|extended-regexp!" => sub { $curTF->{"E_extendedRegexp"} = 1; }, "e|expr=s" => sub { warn "New expr: '$_[0]' '$_[1]'\n"; $curTF->{"expr"} = $_[1]; push @exprs, $curTF; my $newTF = new toFind($curTF); # copy the options to new object $curTF = $newTF; }, "F|fixed-strings!" => sub { $curTF->{"fixedStrings"} = 1; }, "f_file=s" => sub { $curTF->{"file"} = $_[1]; }, "G|basic-regexp!" => sub { $curTF->{"basicRegexp"} = 1; }, "H|with-filename" => \$H_withFilename, "h|no-filename" => sub { $H_withFilename = 0; }, "help|?" => sub { system "perldoc grepFields"; exit; }, "I!" => sub { $binaryFiles = "without-match"; }, "i|ignore-case|y!" => sub { $curTF->{"ignoreCase"} = 1; }, "L|files-without-matches" => sub { $curTF->{"filesWithoutMatch"} = 1; }, "l|files-with-matches" => sub { $curTF->{"filesWithMatches"} = 1; }, "m|max=i" => \$m_maxCount, "mmap" => sub { die "-mmap is not supported.\n"; }, "n|line-number!" => \$n_lineNumber, "o|only-matching!" => \$o_onlyMatching, "label=s" => \$label, "line-buffered" => sub { die "-line-buffered is not supported.\n"; }, "P_perl-regexp!" => sub { $curTF->{"perlRegexp"} = 1; }, "q|quiet|silent!" => \$q_quiet, "R|r|recursive!" => sub { $d_directories = "recurse"; }, "include=s" => \$include, "exclude=s" => \$exclude, "s|no-messages!" => \$s_noMessages, "U|binary" => \$U_binary, "u|unix-byte-offsets" => \$u_unixByteOffsets, "version" => sub { die "Version of $version, by Steven J. DeRose.\n"; }, "v|invert-match!" => sub { $curTF->{"v_invertMatch"} = 1; }, "w|wordRegexp!" => sub { $curTF->{"wordRegexp"} = 1; }, "x|lineRegexp!" => sub { $curTF->{"lineRegexp"} = 1; }, # "y" (see "i") "Z|null" => \$Z_null, # Now, our extra options "fields=n" => sub { $curTF->{"fieldsToUse"} = $_[0]; }, "fileSep=s" => \$fileSep, "fRange=s" => sub { (my $fRange = $_[1]) =~ s/^\s+//; my @tokens = split(/\s*,\s*/,$fRange); (scalar @tokens == 2) || die "Bad format for -fRange $fRange.\n"; my $frMin = sscanf("%g",$tokens[0]); my $frMax = sscanf("%g",$tokens[1]); ($frMin > $frMax) && die "min/max for -fRange $fRange out of order.\n"; $curTF->{frMin} = $frMin; $curTF->{frMax} = $frMax; $curTF->{gotfRange} = 1; }, "verbose+" => \$verbose, ); ($result) || die "Bad options.\n"; ############################################################################### # Check options and prep expressions to search for # if ($curTF->{expr}) { push @exprs, $curTF; } scalar(@exprs) || die "No expressions specified.\n"; ($binaryFiles =~ m/^(binary|without-match|text|sniff)$/) || die die "Bad value '$binaryFiles' for -binaryFiles option.\n"; # Check validity of options for each expr # my $recnum = 0; my $totalExprRecs = 0; for my $i (0..scalar(@exprs)) { my $curTF = $exprs[$i]; if ($curTF->{"f_file"}) { my $path = $curTF->{"f_file"}; open(E, "<$path") || die "Can't find -f file '$path'.\n"; $recnum = 0; while (my $ex = ) { # ($ex =~ m/^#/) || next; $recnum++; chomp $ex; push @{$curTF->{fileExprs}}, $ex; } close E; ($verbose) && warn "Loaded $recnum exprs from '$path'.\n"; $totalExprRecs += $recnum; } if ($curTF->{wordRegexp} && $curTF->{lineRegexp}) { die "Can't specify both -word-regexp and -line-regexp.\n"; } elsif ($curTF->{fixedStrings}) { $curTF->{exprs} =~ s/([^\w\d])/\\$1/g; } elsif ($curTF->{wordRegexp}) { $curTF->{exprs} = "\\b$exprs[$i]\\b"; } elsif ($curTF->{lineRegexp}) { $curTF->{exprs} = "^$exprs[$i]\$"; } my @fds = @{$curTF->expandFieldsToUse()}; if ($expect > 0 && scalar(@fds) > $expect) { warn "Reference to field " . scalar(@fds) . ", but -expect is $expect.\n"; } ($curTF->{fieldsToUse} =~ m/(\d+|\d+=\d+)(,(\d+|\d+=\d+))*/) || die "Bad -fieldsToUse expression '$curTF->{fieldsToUse}'.\n"; } # per expr objects ############################################################################### # Other option checks # if ($oEncoding) { print ""; binmode(STDOUT,":$oEncoding"); } #if ($excludeFrom) { # open(EX, "<$excludeFrom") || die # "Can't find -exclude-from file '$excludeFrom'.\n"; # while (my $ex = ) { # chomp $ex; # push @exclude, $ex; # } # close EX; #} ############################################################################### # Set up CSV handling. Actual headers are loaded per-file. # my $csvf = new csvFormat(); $basicType = uc($basicType); ($basicType =~ m/^(CSV|XML|ARFF)$/) || die "Unknown -basicType '$basicType'\n"; setOptions($csvf); my @headerNames = (); # Must update manually if you change names my %headerNamesHash = (); # ... if (scalar(@names)>0) { ($header) && die "Can't use both -names and -header.\n"; } ############################################################################### # my $nFiles = 0; my $nFound = 0; my $totLines = 0; my $nBadFieldCount = 0; my $nBinaryFiles = 0; my $nDirectories = 0; my $nPlainFiles = 0; my $f = ""; while ($f = shift) { if (! -e $f) { # Doesn't exist ($s_noMessages) || warn "Could not find file '$f'.\n"; next; } if (-B $f) { # binary file ($verbose) && warn "Binary file '$f'\n"; ($binaryFiles == "without-match") && next; $nBinaryFiles++; } elsif (-d $f) { # directory doOneDir($f); } # (handled devices here) else { # regular file ($verbose) && warn "Normal file '$f'\n"; $nFiles++; doOneFile($f); } } # while shift ($q_quiet) || warn "Done, $nFound hits, $totLines records, $nFiles files.\n"; exit; ############################################################################### # (*** may have to explicitly prepend dir path here ***) # sub doOneDir { my ($d) = @_; ($verbose) && warn "Directory '$d'\n"; $nDirectories++; if ($d_directories eq "skip") { return; } elsif ($d_directories eq "read") { doOneFile($d); } elsif ($d =~ m/$exclude/) { return; } elsif ($d_directories eq "recurse") { opendir(D, "<$d") || die "Couldn't open '$d'\n"; while (my $f = readdir(D)) { ($f =~ m/^\./) && next; if (-d $f) { doOneDir($f); } else { doOneFile($f); } } closedir D; } else { ($verbose) && warn "doOneDir: Not recursing on dir '$d'.\n" } } # doOneDir ############################################################################### # sub doOneFile { my $f = $_[0]; ($verbose) && warn "******* Starting file '$f'\n"; $nPlainFiles++; if ($exclude && $f =~ m/$exclude/) { ($verbose>1) && warn "Excluding file '$f'.\n"; return; } if ($include && $f !~ m/$include/) { ($verbose>1) && warn "Not including file '$f'.\n"; return; } ($verbose>1) && warn "Not excluded or un-included: $f.\n"; my $recnum = 0; my @beforeContextLines = (); my $stillToPrint = 0; my $hitsInThisFile = 0; my $offset = 0; my $crCount = 0; my $fh; if (!open($fh, "<$f")) { warn "Unable to open file '$f'.\n"; return; } ($verbose>1) && warn "Opened '$f'\n"; if ($header) { my @headerNames = @{$csvf->parseHeader(<$fh>)}; if ($verbose) { warn "Read header, labels:\n"; for my $i (1..(scalar(@headerNames)-1)) { warn sprintf(" %4d '%s'\n", $i, $headerNames[$i]); } } } while (my $rec = <$fh>) { $recnum++; $totLines++; if (index($rec, "\r")>=0) { $crCount++; } if (!$U_binary) { chomp $rec; } ($verbose>1) && warn "Checking rec $recnum: '$rec'\n"; # Keep last n lines around in case we'll need to print it. if ($B_beforeContext>0) { if (scalar @beforeContextLines >= $B_beforeContext) { shift @beforeContextLines; } push @beforeContextLines, $rec; } # inverted exprs are handled by isRecordAHit(). my $hitText = isRecordAHit($rec); # Report or count hit(s) if (defined $hitText) { $nFound++; $hitsInThisFile++; if ($A_afterContext | $B_beforeContext) { print "$fileSep"; # Separator line print join("\n",@beforeContextLines) . "\n"; @beforeContextLines = (); $stillToPrint = $A_afterContext; } if (!$c_count # && !$filesWithoutMatch # && !$filesWithMatches ) { if ($H_withFilename) { print $f . ($Z_null?chr(0):":"); } if ($n_lineNumber) { print "$recnum:"; } if ($b_byteOffset) { print "$offset:"; } if ($u_unixByteOffsets) { print "" . ($offset-$crCount) . ":"; } if ($o_onlyMatching) { print "$hitText\n"; } else { print "$rec\n"; } } } # hitText elsif ($stillToPrint-- > 0) { print $rec; } $offset += length($rec); if ($m_maxCount>0 && $nFound >= $m_maxCount) { last; } } # EOF if ( ($L_filesWithoutMatch && !$hitsInThisFile) || ($l_filesWithMatches && $hitsInThisFile) ) { print "$f" . (($c_count) ? ":$hitsInThisFile":"") . "\n"; } } # doOneFile ############################################################################### # Test the record against all the expressions. It must match all non-inverted # exprs, and none of the inverted ones, for us to return TRUE. # # Returns: the text content that was matched. # sub isRecordAHit { my ($rec) = @_; # Parse the line into fields my @fieldContents = @{$csvf->parseRecord($rec)}; if ($expect && scalar(@fieldContents)!=$expect) { warn "File $f, record $recnum: has " . scalar(@fieldContents) . " records, not expected $expect.\n"; $nBadFieldCount++; } # See which exprs the record matches. my @matchList = (); my $nMatches = 0; for my $i (0..scalar(@exprs)) { my $curTF = $exprs[$i]; my $someFieldMatched = 0; my @fds = @{$curTF->expandFieldsToUse()}; for my $fNum (1..(scalar(@fds)-1)) { (defined $fds[$fNum]) || next; # sparse array if ($fNum !~ m/^\d+$/) { $fNum = $csvf->getFieldNum($fNum); ($fNum) || next; } my $curField = $fieldContents[$fNum-1]; ($verbose>2) && warn " checking field #$fNum: '$curField'\n"; ($fNum >= scalar @fieldContents) || next; if (my $ex = $curTF->{expr}) { ($verbose>3) && warn " vs expr /$ex/\n"; if ($curField =~ m/($ex)/) { $someFieldMatched = 1; last; } } elsif ($curTF->{gotfRange}) { my $fieldValue = $curField - 0.0; if ($curField >= $curTF->{frMin} && $curField <= $curTF->{frMax}) { $someFieldMatched = 1; last; } } } # per field if (($someFieldMatched && !$curTF->{"invertMatch"}) || (!$someFieldMatched && $curTF->{"invertMatch"})) { $matchList[$i] = 1; $nMatches++; } else { $matchList[$i] = 0; } } # per expr # Now combine the results of all the individual matches. if ($combine eq "all") { return(($nMatches >= scalar(@exprs)) ? 1:0); } elsif ($combine eq "any") { return(($nMatches > 0) ? 1:0); } } # isRecordAHit ############################################################################### # sub setOptions { my ($csvf) = @_; if ($verbose) { $csvf->setVerbose(1); } if ($basicType) { $csvf->setFormatOption("basicType", $basicType); } if ($delim) { $csvf->setFormatOption("fieldSep", $delim); } if ($escape) { $csvf->setFormatOption("escape", $escape); } if ($expect) { $csvf->setFormatOption("minFields", $expect); } # ??? if ($header) { $csvf->setFormatOption("header", $header); } if ($quote) { $csvf->setFormatOption("quote", $quote); } if ($msQuoting) { $csvf->setFormatOption("msQuoting", $msQuoting); } if ($subfield) { $csvf->setFormatOption("subfieldSep",$subfield); } if (scalar(@names)>0) { for my $i (1..(scalar(@names))) { $csvf->setFieldName($i,$names[$i]); } } ($verbose) || return; warn "******* Input Format Options:\n"; showOption($csvf, "basicType"); showOption($csvf, "delim"); showOption($csvf, "escape"); #showOption($csvf, "expect"); showOption($csvf, "header"); showOption($csvf, "quote"); showOption($csvf, "msQuoting"); #showOption($csvf, "subfield"); } # setOptions sub showOption { my ($csv, $opt) = @_; (my $csvOptName = $opt) =~ s/delim/fieldSep/; my $value = $csv->getFormatOption($csvOptName); warn sprintf(" Option %-12s '%s'\n", $opt, showControls($value)); } # showOption ############################################################################### ############################################################################### # This represents a single regex to search for, along with all the options # in effect when it was specified. Option processing writes the relevant # option values directly into a "current" toFind object (and creates a new # toFind whenever it sees -expr). # package toFind; sub new { my ($class, $copyThis) = @_; my $self = { version => "2011-02-19", fieldsToUSe => "", extendedRegexp => 0, fixedStrings => 0, file => 0, basicRexexp => 0, perlRegexp => 1, wordRegexp => 0, lineRegexp => 0, ignoreCase => 0, filesWithoutMatch => 0, filesWithMatches => 0, maxCount => 0, invertMatch => 0, gotfRange => 0, frMin => 0.0, frMax => 0.0, }; if (defined $copyThis) { my %copyHash = %$copyThis; for my $k (keys %copyHash) { $self->{$k} = $copyHash{$k}; } } $self->{expr} = ""; $self->{gotfRange} = 0; $self->{frMin} = $self->{frMin} = 0; bless $self, $class; return($self); } # Used to copy in the current state of expr-specific options, so they're # associated with each expr correctly. # sub setRegexpType { (scalar(@_) == 12) || die "Wrong number of args to setRegexpType().\n"; my $self = shift @_; ($self->{extendedRegexp}, $self->{fixedStrings}, $self->{file}, $self->{basicRexexp}, $self->{perlRegexp}, $self->{wordRegexp}, $self->{lineRegexp}, $self->{ignoreCase}, $self->{filesWithoutMatch}, $self->{filesWithMatches}, $self->{maxCount}, $self->{invertMatch}) = @_; } sub getVersion { my ($self) = @_; return($self->{version}); } sub expandFieldsToUse { my ($self) = @_; my @theList = (); for my $t (split(/,/,$self->{fieldsToUse})) { if ($t =~ m/^(\d+)/) { $theList[$1]++; } elsif ($t =~ m/^(\d+)-(\d+)/) { my $from = $1; my $to = $2; ($from>$to) && die "Fields $from-$to out of order in '$self->{fieldsToUse}'.\n"; for my $i ($from..$to) { $theList[$i]++; } } } # $t return(\@theList); } ############################################################################### ############################################################################### # =pod =head1 Usage grepFields [options] [files] Essentially the same as C, except it only looks within specified fields of CSV-style records. different expressions can be sought in different fields. The field(s) to be matched are specified with L<"-fields"> option, which applies to following matches until changed by another L<"-fields"> option. Several options can be repeated in order to change their value for following matches: =over I or I, I or I, I or I, I or I, I or I, I or I, I or I, I or I, I or I, I or I, I or I. =back =head1 Input formats The delimiter and other input-format options can be set with the options standard for my CSV-oriented scripts (which are built on C): =over =item * B<-delim> I =item * B<-escape> I =item * B<-expect> I =item * B<-msQuoting> =item * B<-quote> I =item * B<-header> =item * B<-iencoding> I =item * B<-ilineends> I =back =head1 Options I like those of C =over =item * B<-base> I<10|16> Whether to show line numbers and/or file offsets in decimal (default) or hex. =item * B<--excludeFrom> I Use I as a list of patterns to treat as if they had been specified for the I<-exclude> option. =item * B<-expect> I Warn for any record that doesn't have exactly I records. =item * B<-expr> I B grep, you can specify multiple regular expressions to match for, possibly in different fields and with different options (such as I<-i>). =item * B<-fields> I Which fields to look in (repeatable, and applies to following matches until replaced by another L<"-fields"> option). Fields count from 1, not 0 (with L<"-header">, field names will eventually be supported as well). =item * B<-frange> I Search for a value of at least I and at most I, in any of the currently-active fields. For example, '-frange 10,57 would require a number between 10 and 57 inclusive. Number formats as understood by Perl scanf %g are ok. This is in addition to any requirements specified by -expr. =item * B<-oencoding> I Set the output encoding to I. =item * B<-only-fields> Similar to the L<"-only-matching"> option, but causing only matching I to be returned, rather than only the matching I of fields. =back =head1 Unsupported standard C options =over =item I<-D> or I<--devices=action> read, skip =item I<-E> (extended regexes; I<-P> is the default, which is pretty 'extended') =item I<-G> (basic regexes) =item I<-T> or I<--initial-tab> =item I<--line-buffered> =item I<--mmap> =item I<-z> or I<--null-data> =back =head1 Options like those of C (from the usual C descriptions -- differences added in B). =over =item * B<-A ___> or B<--after-context=___> Print ___ lines of trailing context after matching lines. Places a line containing I<--> between contiguous groups of matches. =item * B<-a> or B<--text> Process a binary file as if it were text; this is equivalent to the I<--binary-files=text> option. =item * B<-B ___> or B<--before-context=___> Print ___ lines of leading context before matching lines. Places a line containing I<--> between contiguous groups of matches. =item * B<-C ___> or B<--context=___> Print ___ lines of output context. Places a line containing I<--> between contiguous groups of matches. =item * B<-b> or B<--byte-offset> Print the byte offset within the input file before each line of output. =item * B<--binary-files=____> If the first few bytes of a file indicate that the file contains binary data, assume that the file is of type ____. By default, ____ is binary, and grep normally outputs either a one-line message saying that a binary file matches, or no message if there is no match. If ____ is I, grep assumes that a binary file does not match; this is equivalent to the -I option. If ____ is I, grep processes a binary file as if it were text; this is equivalent to the I<-a> option. ________ grep I<--binary-files=text> might output binary garbage, which can have nasty side effects if the output is a terminal and if the terminal driver interprets some of it as commands. =item * B<--colour[=____]_ --color[=____]> Surround the matching string with the marker find in C environment variable. WHEN may be I, I, or I. B grep, Defaults to true if the environment variable C is set and STDOUT is going to a terminal. =item * B<-c> or B<--count> Suppress normal output; instead print a count of matching lines for each input file. With the I<-v> or B<--invert-match> option (see below), count non-matching lines. =item * B<-D ______> or B<--devices=______> If an input file is a device, FIFO or socket, use ______ to process it. By default, ______ is I, which means that devices are read just as if they were ordinary files. If ______ is I, devices are silently skipped. =item * B<-d ______> or B<--directories=______> If an input file is a directory, use ______ to process it. By default, ______ is I, which means that directories are read just as if they were ordinary files. If ______ is I, directories are silently skipped. If ______ is I, grep reads all files under each directory, recursively; this is equivalent to the I<-r> option. =item * B<-E> or B<--extended-regexp> Interpret _______ as an extended regular expression (see below). =item * B<-e _______> or B<--regexp=_______> Use _______ as the pattern; useful to protect patterns beginning with I<->. =item * B<-F> or B<--fixed-strings> Interpret _______ as a list of fixed strings, separated by newlines, any of which is to be matched. =item * B<-f ____> or B<--file=____> Obtain patterns from ____, one per line. The empty file contains zero patterns, and therefore matches nothing. =item * B<-G> or B<--basic-regexp> Interpret _______ as a basic regular expression (see below). This is the default. =item * B<-H> or B<--with-filename> Print the filename for each match. =item * B<-h> or B<--no-filename> Suppress the prefixing of filenames on output when multiple files are searched. =item * B<--help> Output a brief help message. =item * B<-I> Process a binary file as if it did not contain matching data; this is equivalent to the I<--binary-files=without-match> option. =item * B<-i> or B<--ignore-case> Ignore case distinctions in both the _______ and the input files. =item * B<-L> or B<--files-without-match> Suppress normal output; instead print the name of each input file from which no output would normally have been printed. The scanning will stop on the first match. =item * B<-l> or B<--files-with-matches> Suppress normal output; instead print the name of each input file from which output would normally have been printed. The scanning will stop on the first match. =item * B<-m ___> or B<--max-count=___> Stop reading a file after ___ matching lines. If the input is standard input from a regular file, and ___ matching lines are output, grep ensures that the standard input is positioned to just after the last matching line before exiting, regardless of the presence of trailing context lines. This enables a calling process to resume a search. When grep stops after ___ matching lines, it outputs any trailing context lines. When the I<-c> or I<--count> option is also used, grep does not output a count greater than ___. When the I<-v> or I<--invert-match> option is also used, grep stops after outputting ___ non-matching lines. =item * B<--mmap> If possible, use the mmap(2) system call to read input, instead of the default read(2) system call. In some situations I<--mmap> yields better performance. However, I<--mmap> can cause undefined behavior (including core dumps) if an input file shrinks while C is operating, or if an I/O error occurs. =item * B<-n> or B<--line-number> Prefix each line of output with the line number within its input file. =item * B<-o> or B<--only-matching> Show only the part of a matching line that matches ________ =item * B<--label=_____> Displays input actually coming from standard input as input coming from file ______ This is especially useful for tools like C, e.g. C. =item * B<--line-buffered> Turns on line buffering. However, this can be a performance penalty. B grep, this is not supported. =item * B<-P> or B<--perl-regexp> Interpret _______ as a Perl regular expression. B grep, this is the default. =item * B<-q> or B<--quiet> or B<--silent> Quiet; do not write anything to standard output. Exit immediately with zero status if any match is found, even if an error was detected. Also see the I<-s> or I<--no-messages> option. =item * B<-R, -r> or B<--recursive> Read all files under each directory, recursively; this is equivalent to the I<-d> recurse option. =item * B<--include> I Recurse in directories only searching file matching ________ (B grep, this is repeatable). =item * B<--exclude> I Recurse in directories skip file matching ________ (B grep, this is repeatable, and there is a related L<"-excludeFrom"> option to take a list of exclude-patterns from a file.). =item * B<-s> or B<--no-messages> Suppress error messages about nonexistent or unreadable files. Portability note: unlike GNU grep, traditional grep did not conform to POSIX.2, because traditional grep lacked a I<-q> option and its -s option behaved like GNU grep's I<-q> option. Shell scripts intended to be portable to traditional grep should avoid both I<-q> and I<-s> and should redirect output to C instead. =item * B<-U> or B<--binary> Treat the file(s) as binary. By default, under MS-DOS and MSWindows, grep guesses the file type by looking at the contents of the first 32KB read from the file. If grep decides the file is a text file, it strips the CR characters from the original file contents (to make regular expressions with C<^> and C<$> work correctly). Specifying I<-U> overrules this guesswork, causing all files to be read and passed to the matching mechanism verbatim; if the file is a text file with CR/LF pairs at the end of each line, this will cause some regular expressions to fail. This option has no effect on platforms other than MS-DOS and MS-Windows. =item * B<-u> or B<--unix-byte-offsets> Report Unix-style byte offsets. This switch causes grep to report byte offsets as if the file were Unix-style text file, i.e. with CR characters stripped off. This will produce results identical to running grep on a Unix machine. This option has no effect unless the I<-b> option is also used; it has no effect on platforms other than MS-DOS and MS-Windows. =item * B<-V> or B<--version> Print the version number of grep to standard error. This version number should be included in all bug reports (see below). =item * B<-v> or B<--invert-match> Invert the sense of matching, to select non-matching lines. =item * B<-w> or B<--word-regexp> Select only those lines containing matches that form whole words. The test is that the matching substring must either be at the beginning of the line, or preceded by a non-word constituent character. Similarly, it must be either at the end of the line or followed by a non-word constituent character. Word-constituent characters are letters, digits, and the underscore. =item * B<-x> or B<--line-regexp> or B<--field-regexp> Select only those matches that exactly match the whole sequence of fields current being searched. =item * B<-y> Obsolete synonym for -i. =item * B<-Z> or B<--null> Output a zero byte (the ASCII NUL character) instead of the character that normally follows a file name. For example, grep -lZ outputs a zero byte after each file name instead of the usual newline. This option makes the output unambiguous, even in the presence of file names containing unusual characters like newlines. This option can be used with commands like find C<-print0>, C, C, and C to process arbitrary file names, even those that contain newline characters. =back =head1 Perl regexes (see C for details) =head1 Related commands grep, cvsFormat.pm. =head1 Known bugs and limitations Many of the options have not had much testing (for example, unix-byte-offsets and the exclude/include, binary, context, and directory options). =head1 Ownership This work by Steven J. DeRose is licensed under a Creative Commons Attribution-Share Alike 3.0 Unported License. For further information on this license, see http://creativecommons.org/licenses/by-sa/3.0/. The author's present email is sderose at acm.org. For the most recent version, see http://www.derose.net/steve/utilities/. =cut