#!/usr/bin/perl -w # # hilite: colorize multiple regex matches in a stream. # # 2006-11-03: Written by Steven J. DeRose, sderoses@acm.org. # 2008-03-23 sjd: Rewrite. # 2008-04-03 sjd: Add -aname, -avalue, -file options. # 2008-04-11 sjd: Fix doc. # 2010-11-29 sjd: Cleanup. Finish -file. # # To do: # Postscript output so you can color-print? Linux 'enscript' supports # Option for filename-containing or filename-with-extension # -aname and -avalue should use lookbehind so only match target portion. # Option to hilite all occurrences of any words from file F. # use strict; use Getopt::Long; my $version = "2010-11-29"; my $cStart = `colorstring red`; my $cEnd = `colorstring cancel`; # Options # my $curColor = "red"; my %exprs = (); # expressions to hilite, and color for each my $file = ""; # load expressions from a file my $fixBars = 0; # try harder to detect real diff change-lines my $ignorecase = 1; # ignore case for all -e exprs. my $only = 0; my $quiet = 0; my $tabInterval = 4; my $test = 0; my $untab = 0; my $verbose = 0; my $waitfor = ""; my $wholelines = 0; # Option flags for pre-defined expression sets # my $predefined_cvsstatus = 0; my $predefined_diff = 0; my $predefined_entities = 0; my $predefined_err = 0; my $predefined_gi = 0; my $predefined_htmlstyle = 0; my $predefined_man = 0; my $predefined_ns = 0; my $predefined_regex = 0; my $predefined_tag = 0; my $predefined_uri = 0; my $predefined_xml = 0; my $predefined_css = 0; my @predefined_aname = (); my @predefined_avalue = (); ############################################################################### # Process options # Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( # Predefined target expressions "aname=s" => \@predefined_aname, "avalue=s" => \@predefined_avalue, "cvsstatus" => \$predefined_cvsstatus, "diff" => \$predefined_diff, "entities" => \$predefined_entities, "err|error" => \$predefined_err, "gi" => \$predefined_gi, "htmlstyle" => \$predefined_htmlstyle, "man" => \$predefined_man, "ns=s" => \$predefined_ns, "regex" => \$predefined_regex, "tag=s" => \$predefined_tag, "uri" => \$predefined_uri, "xml" => \$predefined_xml, "css" => \$predefined_css, # Other options "c=s" => sub { $curColor = $_[1]; my $try = `colorstring $curColor`; if (!$try) { warn "Unknown color '$curColor'..\n"; } else { $cStart = $try; } }, "e=s" => sub { $exprs{$_[1]} = $cStart; }, "file=s" => sub { (-f $_[1]) || die "Can't find file for -f $_[1].\n"; open F, "<$_[1]"; my $nf = 0; while (my $l = ) { $nf++; $exprs{$l} = `colorstring red`; } close F; ($verbose) && warn "Loaded $nf expressions from file '$_[1]'.\n"; }, "fixBars!" => \$fixBars, "h|help|?" => sub { system "perldoc hilite"; exit; }, "i!" => \$ignorecase, "l!" => \$wholelines, "o!" => \$only, "q|quiet!" => \$quiet, "t=n" => \$tabInterval, "test!" => \$test, "u!" => \$untab, "v!" => \$verbose, "verbose+" => \$verbose, "version" => sub { die "Version of $version, by Steven J. DeRose.\n"; }, "waitfor=s" => \$waitfor, ); ############################################################################### # Validate and default options # ($result) || die "hilite: Bad options.\n"; my @files = (); if ($ARGV[0]) { @files = @ARGV; } else { $files[0] = "--stdin--"; } ($tabInterval > 1) || die "hilite: Bad tab interval '$tabInterval'.\n"; if ($tabInterval > 0) { $untab = 1; } ############################################################################### # Implement predefined target expressions # my $e = ""; if (scalar @predefined_aname > 0) { foreach my $a (@predefined_aname) { $e = "<\\w[-_:.\\w\\d]*[^>]*\\s*(" . $a . ")\\s*=\\s*\"[^\"]*\""; $exprs{$e} = `colorstring red`; $e = "<\\w[-_:.\\w\\d]*[^>]*\\s*(" . $a . ")\\s*=\\s*'[^']*'"; $exprs{$e} = `colorstring red`; } } if (scalar @predefined_avalue > 0) { foreach my $a (@predefined_avalue) { $e = "<\\w[-_:.\\w\\d]*[^>]*=\\s*\"(" . $a . ")\""; $exprs{$e} = `colorstring red`; $e = "<\\w[-_:.\\w\\d]*[^>]*=\\s*\'(" . $a . ")\'"; $exprs{$e} = `colorstring red`; } } if ($predefined_cvsstatus) { $wholelines = 1; $exprs{"Up-to-date"} = `colorstring green`; $exprs{"Locally Added"} = `colorstring green`; $exprs{"Locally Modified"} = `colorstring cyan`; $exprs{"Needs Patch"} = `colorstring yellow`; $exprs{"Needs Checkout"} = `colorstring yellow`; $exprs{"Unknown"} = `colorstring yellow`; $exprs{"Needs Merge"} = `colorstring red`; $exprs{"had conflicts"} = `colorstring red`; $exprs{"Unresolved Conflict"} = `colorstring red`; $exprs{"Invalid|Error"} = `colorstring bg_red`; # Following is added by ~deroses/bin/mods... $exprs{"but repository has"} = `colorstring bg_red`; $wholelines = 1; } # Require 2 spaces before "|" so we don't catch it in content. if ($predefined_diff) { $wholelines = 1; if ($fixBars) { warn "hilite: -fixBars may not work due to tabs.\n"; my $w = $ENV{COLUMNS}; if ($w eq "") { $w = 80; } my $centerColumn = int(($w+1) / 2.0); my $min = int($w/2) - 2; my $max = $min + 4; # look for "|" here $exprs{"^\\s+>\\s"} = `colorstring green`; $exprs{"\\s<\\s+\$"} = `colorstring bold_cyan`; my $e = "^.{$min,$max}\\|"; ($verbose) && warn "***** min $min, max $max, expr '$e'.\n"; $exprs{$e} = `colorstring yellow`; } else { $exprs{"^\\s+>\\s"} = `colorstring green`; $exprs{"\\s<\\s+\$"} = `colorstring bold_cyan`; $exprs{"\\s\\s\\|\\s"} = `colorstring yellow`; warn "Consider -fixBars to improve '|' detection.\n"; #$exprs{"\\s\\s\\|\\s"} = `colorstring yellow`; # problem } } if ($predefined_entities) { $e = "&[^;]*;"; $exprs{$e} = $cStart; } if ($predefined_err) { $ignorecase = 1; $exprs{"(syntax )?ERROR( occurred at|s)?"} = $exprs{"(element|attribute)? *('\\w+')? *(is)? *(INVALID|NOT VALID)"} = $exprs{"CANNOT|CAN'T"} = $exprs{"MUST NOT|MAY NOT"} = $exprs{"UNKNOWN( file| command| variable| directory)?(, )?"} = $exprs{"FATAL"} = $exprs{"TERMINAT(e|ed|ing)"} = $exprs{"( \\w+)?EXCEPTION"} = $exprs{"FAIL(ed)?"} = `colorstring red`; # would be nice to exclude "(0 failed)": # $exprs{"([1-9]+0\s+|[^0]\s+)FAIL(ed)?"} $exprs{"WARNINGs? *[:=]? *[0-9]*"} = $exprs{"SHOULD NOT"} = $exprs{"<<<= check the source here"} = `colorstring yellow`; $exprs{"INFO"} = $exprs{"(XSLT )?MESSAGE:"} = $exprs{"processed *(in *[0-9.]+ *sec\\.?)?"} = `colorstring cyan`; $exprs{"SUCCESS(ful)?(ly)?"} = $exprs{"SUCCEED(ed|ing)?"} = $exprs{"START(ed|ing) *(file|document)*"} = $exprs{"RUNNING:? *(file|document)*"} = $exprs{"FINISH(ed|ing)"} = $exprs{"COMPLET(e|ed|ing)?"} = $exprs{"DONE"} = `colorstring green`; $exprs{"(/[-\\w]*\\.x.l,)? *line [0-9]+((, )(column|offset) [0-9]+)"} = `colorstring bold`; } # -err if ($predefined_gi) { $e = "<[-\\w]+\\s"; $exprs{$e} = $cStart; $e = "]*>"; $exprs{$e} = $cStart; } if ($predefined_tag) { shift; my $t = $predefined_tag; ($t =~ m/^\w[-:\w]*$/) || die "hilite: Invalid -tag type '$t'.\n"; $e = "<\\/?$t(\\s[^>]*|\\/)?>"; $exprs{$e} = $cStart; } if ($predefined_uri) { $e = "http"; # RFC 1738 # $e = "https?:[-_\$.+!*,()&a-zA-Z0-9]*"; # RFC 1738 $exprs{$e} = $cStart; } if ($predefined_xml) { $e = "<[^!][^>]*(>|\$)"; $exprs{$e} = `colorstring magenta`; $e = "]*(>|\$)"; $exprs{$e} = `colorstring yellow`; $e = "<\\/?xsl:[^>]*(>|\$)"; $exprs{$e} = `colorstring cyan`; $e = "&[^;]*;"; $exprs{$e} = `colorstring green`; } if ($predefined_regex) { $e = "[{}]"; $exprs{$e} = `colorstring magenta`; $e = "[^\\\\][()?*+]"; $exprs{$e} = `colorstring red`; $e = "\\\\"; $exprs{$e} = `colorstring yellow`; $e = "[\\[\\]][-^]"; $exprs{$e} = `colorstring green`; } if ($predefined_css) { my @names = split(/\s+/, cssPropertyList()); my $nprops = scalar @names; ($quiet) || warn "hilite: Scanning for $nprops css property names.\n"; # ($verbose) && print "Properties: @names.\n"; for my $a (@names) { $e = "[^-\\w]$a" . "[^-\\w]"; $exprs{$e} = `colorstring yellow`; } } # Read regexes from a file # if ($file) { open(XLIST, "<$file") || die "Could not open expression file '$file'.\n"; binmode XLIST, ":utf8"; my $c = `colorstring red`; while (my $x = ) { # Could 'or' together instead... chomp $x; $exprs{$x} = $c; } close XLIST; ($verbose) && warn "Expressions loaded from '$file': " . scalar(keys %exprs) . ".\n"; } ############################################################################### # Report the setup (*** fix to show colors, too ***) # ((scalar keys %exprs) > 0) || ($test) || die "hilite: No regexes specified.\n"; if ($verbose || $test) { my @ekeys = keys %exprs; my $nexprs = scalar @ekeys; warn "hilite: The $nexprs expression(s) to match:\n"; my $i = 1; for $e (@ekeys) { if ($i < 10) { $i = " $i"; } warn " $i:\t $e\n"; $i++; } warn "hilite: Case will be " . (($ignorecase) ? "ignored":"regarded") . ".\n"; ($test) && exit; } ############################################################################### # Crank # ($verbose) && warn "Files to do: " . scalar(@files) . ".\n"; my $tfile = "/tmp/hilite.txt_" . int(rand(100000)); my $fh; for my $f (@files) { ($verbose) && print "hilite: Starting file '$f'\n"; if ($f eq "--stdin--") { ($verbose) && warn "Opening STDIN...\n"; open $fh, "<&STDIN"; } elsif ($untab) { my $cmd = "expand -t $tabInterval <$f >$tfile"; system "$cmd"; ($verbose) && warn "hilite: Ran '$cmd'.\n"; open $fh, "<$tfile"; } else { open $fh, "<$f"; } my $doneWaitingFor = ($waitfor ne "") ? 0:1; while (my $line = <$fh>) { my $gotit = 0; if ($wholelines) { # stop at first match per line foreach my $e (keys %exprs) { if (($ignorecase && $line =~ m/$e/i) || (!$ignorecase && $line =~ m/$e/)) { $gotit = 1; $line = "$exprs{$e}$line$cEnd"; last; } } } else { # find all matches per line foreach my $e (keys %exprs) { # screwy case is where match contains $cEnd already.... $gotit ||= ($line =~ m/$e/); if ($ignorecase) { $line =~ s/($e)/$exprs{$e}$1$cEnd/gi; } else { $line =~ s/($e)/$exprs{$e}$1$cEnd/g; } } } if (!$doneWaitingFor) { if ($waitfor eq "*") { if ($gotit) { $doneWaitingFor = 1; } } elsif ($line =~ m/$waitfor/) { $doneWaitingFor = 1; } else { next; } } if ($only == 0 || $gotit) { print $line; } } # while if ($f ne "--stdin--") { close $fh; } } # for system "rm $tfile 2>/dev/null"; exit; ############################################################################### # sub cssPropertyList { return ( "accelerator azimuth background background-attachment background-color background-image background-position background-position-x background-position-y background-repeat behavior border border-bottom border-bottom-color border-bottom-style border-bottom-width border-collapse border-color border-left border-left-color border-left-style border-left-width border-right border-right-color border-right-style border-right-width border-spacing border-style border-top border-top-color border-top-style border-top-width border-width bottom caption-side clear clip color content counter-increment counter-reset cue cue-after cue-before cursor direction display elevation empty-cells filter float font font-family font-size font-size-adjust font-stretch font-style font-variant font-weight height ime-mode include-source layer-background-color layer-background-image layout-flow layout-grid layout-grid-char layout-grid-char-spacing layout-grid-line layout-grid-mode layout-grid-type left letter-spacing line-break line-height list-style list-style-image list-style-position list-style-type margin margin-bottom margin-left margin-right margin-top marker-offset marks max-height max-width min-height min-width orphans outline outline-color outline-style outline-width overflow overflow-X overflow-Y padding padding-bottom padding-left padding-right padding-top page page-break-after page-break-before page-break-inside pause pause-after pause-before pitch pitch-range play-during position quotes richness right ruby-align ruby-overhang ruby-position scrollbar-3d-light-color scrollbar-arrow-color scrollbar-base-color scrollbar-dark-shadow-color scrollbar-face-color scrollbar-highlight-color scrollbar-shadow-color scrollbar-track-color size speak speak-header speak-numeral speak-punctuation speech-rate stress table-layout text-align text-align-last text-autospace text-decoration text-indent text-justify text-kashida-space text-overflow text-shadow text-transform text-underline-position top unicode-bidi vertical-align visibility voice-family volume white-space widows width word-break word-spacing word-wrap writing-mode z-index zoom"); } ############################################################################### # what else can there be? padding? # sub htmlStyleAttributeList { return ( "align axis background bgcolor border cellpadding cellspacing clear color face frameborder height hspace marginheight marginwidth nowrap rules size style valign vspace width"); } ############################################################################### # =pod =head1 Usage hilite [options] [files] Prints the file(s) (or stdin if no files are specified), colorizing matches to any regex(es) specified (see -e). =head1 General Options =over =item * B<-c> I A color name to use for following matches (default = red). Terminal colors are discussed under I and I. The basic colors are: 'black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'lightgray', and 'default'. =item * B<-e> I A (Perl-style) regex to highlight matches of. Repeatable. Matches will use the latest value for I<-c>. For example: hilite -c red -e 'error' -c yellow -e 'warning' =item * B<-f> I Load expressions from a file. =item * B<-i> Ignore case (all regexes, not just following ones). =item * B<-l> Highlight whole lines, not just the matched part(s). =item * B<-o> Only show lines that have highlighting. =item * B<-q> Suppress most messages. =item * B<-r> Regard case ('-i' is the default). =item * B<-t> I Expand tabs to space, assuming tabs every n spaces. =item * B<-u> Untabify: same as '-t 4'. =item * B<-version> Display version info and exit. =item * B<-waitfor> I Suppress all output until regex I is seen. If I is '*', waits for the first highlightable match. =back =head1 Options that turn on predefined regex sets: You can add to these with I<-e>, but can't delete expressions from them. The colors set for these are best with a black background. =over =item * B<-aname> I XML attribute with (entire) name matching e. =item * B<-avalue> I XML attribute with (entire) value matching e. =item * B<-css> CSS style property names. =item * B<-cvsstatus> Output from cvs status, highlighting problems. =item * B<-diff> Show lines (I<-l>) with differences found by I (side-by-side). =item * B<-fixBars> Use before I<-diff> to try to do better detection of '|' flag. =item * B<-entities> XML entity references. =item * B<-error> 'error' in red, 'warning' in yellow, 'info' in cyan, etc. (this one is really nice!) =item * B<-gi> Start and end XML tags but only the element type name. =item * B<-htmlstyle> CSS style attributes in HTML. =item * B<-man> Man pages (experimental). =item * B<-ns> I XML tags with explicit namespace prefix I (repeatable). =item * B<-regex> Special characters in regular expressions (experimental). =item * B<-tag> I A particular XML tag (repeatable). =item * B<-test> Show expressions to match, but do nothing. =item * B<-uri> All http uris (not finished). =item * B<-xml> XML tags, XSL tags, comments, declarations, etc. =back =head1 Related commands I: provides terminal color control strings, etc. Linux I: can highlight program syntax, or I output; this program may eventually support color printing via I. Linux I seems to do something similar. =head1 Known bugs and limitations Predefined expressions are not necessarily applied in the order specified. Multi-line tags or comments are not fully highlighted with I<-xml>. Overlapping matches may produce interesting highlighting. I<-diff> highlights some lines with ' | ' in them that aren't diffs. To see highlighting with I on some sytems, use I. =head1 Ownership This work by Steven J. DeRose is licensed under a Creative Commons Attribution-Share Alike 3.0 Unported License. For further information on this license, see http://creativecommons.org/licenses/by-sa/3.0/. The author's present email is sderose at acm.org. For the most recent version, see http://www.derose.net/steve/utilities/. =cut