#!/usr/bin/perl -w # # hilite: colorize multiple regex matches in a stream. # # 2006-11-03: Written by Steven J. DeRose, sderoses@acm.org. # 2008-03-23 sjd: Rewrite. # 2008-04-03 sjd: Add -aname, -avalue, -file options. # 2008-04-11 sjd: Fix doc. # # To do: # Postscript output so you can color-print? Linux 'enscript' supports # Option for filename-containing or filename-with-extension # -aname and -avalue should use lookbehind so only match target portion. # use strict; use Getopt::Long; my $version = "2008-04-11"; my $cStart = `colorstring red`; my $cEnd = `colorstring cancel`; # Options my $curColor = "red"; my %exprs = (); # expressions to hilite, and color for each my $file = ""; # load expressions from a file my $fixBars = 0; # try harder to detect real --diff change-lines my $help = 0; my $ignorecase = 1; # ignore case for all -e exprs. my $only = 0; my $quiet = 0; my $tabInterval = 4; my $test = 0; my $untab = 0; my $verbose = 0; my $waitfor = ""; my $wholelines = 0; # Option flags for pre-defined expression sets my $predefined_cvsstatus = 0; my $predefined_diff = 0; my $predefined_entities = 0; my $predefined_err = 0; my $predefined_gi = 0; my $predefined_htmlstyle = 0; my $predefined_man = 0; my $predefined_ns = 0; my $predefined_regex = 0; my $predefined_tag = 0; my $predefined_uri = 0; my $predefined_xml = 0; my $predefined_css = 0; my @predefined_aname = (); my @predefined_avalue = (); # Process options Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( # Predefined target expressions "aname=s" => \@predefined_aname, "avalue=s" => \@predefined_avalue, "cvsstatus" => \$predefined_cvsstatus, "diff" => \$predefined_diff, "entities" => \$predefined_entities, "err|error" => \$predefined_err, "gi" => \$predefined_gi, "htmlstyle" => \$predefined_htmlstyle, "man" => \$predefined_man, "ns=s" => \$predefined_ns, "regex" => \$predefined_regex, "tag=s" => \$predefined_tag, "uri" => \$predefined_uri, "xml" => \$predefined_xml, "css" => \$predefined_css, "c=s" => sub { $curColor = $_[1]; my $try = `colorstring $curColor`; if (!$try) { print "Unknown color '$curColor'..\n"; } else { $cStart = $try; } }, "e=s" => sub { $exprs{$_[1]} = $cStart; }, "file=s" => sub { (-f $_[1]) || die "Can't find file for -f $_[1].\n"; open F, "<$_[1]"; my $nf = 0; while (my $l = ) { $nf++; $exprs{$l} = `colorstring red`; } close F; ($verbose) && warn "Loaded $nf expressions from file '$_[1]'.\n"; }, "fixBars!" => \$fixBars, "h|help|?" => \$help, "i!" => \$ignorecase, "l!" => \$wholelines, "o!" => \$only, "q|quiet!" => \$quiet, "t=n" => \$tabInterval, "test!" => \$test, "u!" => \$untab, "v!" => \$verbose, "verbose+" => \$verbose, "version" => sub { die "hilite: Version of $version, by Steven J. DeRose, sderose\@acm.org.\n"; }, "waitfor=s" => \$waitfor ); # Validate and default options if ($help) { showUsage(); exit; } ($result) || die "hilite: Bad options.\n"; my @files = (); if ($ARGV[0]) { @files = @ARGV; } else { $files[0] = "--stdin--"; } ($tabInterval > 1) || die "hilite: Bad tab interval '$tabInterval'.\n"; if ($tabInterval > 0) { $untab = 1; } ############################################################################### # Implement predefined target expressions if (scalar @predefined_aname > 0) { foreach my $a (@predefined_aname) { my $e = "<\\w[-_:.\\w\\d]*[^>]*\\s*(" . $a . ")\\s*=\\s*\"[^\"]*\""; $exprs{$e} = `colorstring red`; $e = "<\\w[-_:.\\w\\d]*[^>]*\\s*(" . $a . ")\\s*=\\s*'[^']*'"; $exprs{$e} = `colorstring red`; } } if (scalar @predefined_avalue > 0) { foreach my $a (@predefined_avalue) { my $e = "<\\w[-_:.\\w\\d]*[^>]*=\\s*\"(" . $a . ")\""; $exprs{$e} = `colorstring red`; $e = "<\\w[-_:.\\w\\d]*[^>]*=\\s*\'(" . $a . ")\'"; $exprs{$e} = `colorstring red`; } } if ($predefined_cvsstatus) { $wholelines = 1; $exprs{"Up-to-date"} = `colorstring green`; $exprs{"Locally Added"} = `colorstring green`; $exprs{"Locally Modified"} = `colorstring cyan`; $exprs{"Needs Patch"} = `colorstring yellow`; $exprs{"Needs Checkout"} = `colorstring yellow`; $exprs{"Unknown"} = `colorstring yellow`; $exprs{"Needs Merge"} = `colorstring red`; $exprs{"had conflicts"} = `colorstring red`; $exprs{"Unresolved Conflict"} = `colorstring red`; $exprs{"Invalid|Error"} = `colorstring bg_red`; # Following is added by ~deroses/bin/mods... $exprs{"but repository has"} = `colorstring bg_red`; $wholelines = 1; } # Require 2 spaces before "|" so we don't catch it in content. if ($predefined_diff) { $wholelines = 1; if ($fixBars) { warn "hilite: -fixBars may not work due to tabs.\n"; my $w = $ENV{COLUMNS}; if ($w eq "") { $w = 80; } my $centerColumn = int(($w+1) / 2.0); my $min = int($w/2) - 2; my $max = $min + 4; # look for "|" here $exprs{"^\\s+>\\s"} = `colorstring green`; $exprs{"\\s<\\s+\$"} = `colorstring fg2_cyan`; my $e = "^.{$min,$max}\\|"; ($verbose) && warn "***** min $min, max $max, expr '$e'.\n"; $exprs{$e} = `colorstring yellow`; } else { $exprs{"^\\s+>\\s"} = `colorstring green`; $exprs{"\\s<\\s+\$"} = `colorstring fg2_cyan`; $exprs{"\\s\\s\\|\\s"} = `colorstring yellow`; warn "Consider -fixBars to improve '|' detection.\n"; #$exprs{"\\s\\s\\|\\s"} = `colorstring yellow`; # problem } } if ($predefined_entities) { my $e = "&[^;]*;"; $exprs{$e} = $cStart; } if ($predefined_err) { $ignorecase = 1; $exprs{"(syntax )?ERROR( occurred at|s)?"} = $exprs{"(element|attribute)? *('\\w+')? *(is)? *(INVALID|NOT VALID)"} = $exprs{"CANNOT|CAN'T"} = $exprs{"MUST NOT|MAY NOT"} = $exprs{"UNKNOWN( file| command| variable| directory)?(, )?"} = $exprs{"FATAL"} = $exprs{"TERMINAT(e|ed|ing)"} = $exprs{"( \\w+)?EXCEPTION"} = $exprs{"FAIL(ed)?"} = `colorstring red`; # would be nice to exclude "(0 failed)": # $exprs{"([1-9]+0\s+|[^0]\s+)FAIL(ed)?"} $exprs{"WARNINGs? *[:=]? *[0-9]*"} = $exprs{"SHOULD NOT"} = $exprs{"<<<= check the source here"} = `colorstring yellow`; $exprs{"INFO"} = $exprs{"(XSLT )?MESSAGE:"} = $exprs{"processed *(in *[0-9.]+ *sec\\.?)?"} = `colorstring cyan`; $exprs{"SUCCESS(ful)?(ly)?"} = $exprs{"SUCCEED(ed|ing)?"} = $exprs{"START(ed|ing) *(file|document)*"} = $exprs{"RUNNING:? *(file|document)*"} = $exprs{"FINISH(ed|ing)"} = $exprs{"COMPLET(e|ed|ing)?"} = $exprs{"DONE"} = `colorstring green`; $exprs{"(/[-\\w]*\\.x.l,)? *line [0-9]+((, )(column|offset) [0-9]+)"} = `colorstring bold`; } # -err my $e = ""; if ($predefined_gi) { $e = "<[-\\w]+\\s"; $exprs{$e} = $cStart; $e = "]*>"; $exprs{$e} = $cStart; } if ($predefined_tag) { shift; my $t = $predefined_tag; ($t =~ m/^\w[-:\w]*$/) || die "hilite: Invalid -tag type '$t'.\n"; $e = "<\\/?$t(\\s[^>]*|\\/)?>"; $exprs{$e} = $cStart; } if ($predefined_uri) { $e = "http"; # RFC 1738 # $e = "https?:[-_\$.+!*,()&a-zA-Z0-9]*"; # RFC 1738 $exprs{$e} = $cStart; } if ($predefined_xml) { $e = "<[^!][^>]*(>|\$)"; $exprs{$e} = `colorstring magenta`; $e = "]*(>|\$)"; $exprs{$e} = `colorstring yellow`; $e = "<\\/?xsl:[^>]*(>|\$)"; $exprs{$e} = `colorstring cyan`; $e = "&[^;]*;"; $exprs{$e} = `colorstring green`; } if ($predefined_regex) { $e = "[{}]"; $exprs{$e} = `colorstring magenta`; $e = "[^\\\\][()?*+]"; $exprs{$e} = `colorstring red`; $e = "\\\\"; $exprs{$e} = `colorstring yellow`; $e = "[\\[\\]][-^]"; $exprs{$e} = `colorstring green`; } if ($predefined_css) { my @names = split(/\s+/, cssPropertyList()); my $nprops = scalar @names; ($quiet) || warn "hilite: Scanning for $nprops css property names.\n"; # ($verbose) && print "Properties: @names.\n"; for my $a (@names) { $e = "[^-\\w]$a" . "[^-\\w]"; $exprs{$e} = `colorstring yellow`; } } # Report the setup (*** fix to show colors, too ***) ((scalar keys %exprs) > 0) || ($test) || die "hilite: No regexes specified.\n"; if ($verbose || $test) { my @ekeys = keys %exprs; my $nexprs = scalar @ekeys; warn "hilite: The $nexprs expression(s) to match:\n"; my $i = 1; for $e (@ekeys) { if ($i < 10) { $i = " $i"; } warn " $i:\t $e\n"; $i++; } warn "hilite: Case will be " . (($ignorecase) ? "ignored":"regarded") . ".\n"; ($test) && exit; } ############################################################################### # Crank ($verbose) && warn "Files to do: " . scalar(@files) . ".\n"; my $tfile = "/tmp/hilite.txt_" . int(rand(100000)); my $fh; for my $f (@files) { ($verbose) && print "hilite: Starting file '$f'\n"; if ($f eq "--stdin--") { ($verbose) && warn "Opening STDIN...\n"; open $fh, "<&STDIN"; } elsif ($untab) { my $cmd = "expand -t $tabInterval <$f >$tfile"; system "$cmd"; ($verbose) && warn "hilite: Ran '$cmd'.\n"; open $fh, "<$tfile"; } else { open $fh, "<$f"; } my $doneWaitingFor = ($waitfor ne "") ? 0:1; while (my $line = <$fh>) { my $gotit = 0; if ($wholelines) { foreach my $e (keys %exprs) { if (($ignorecase && $line =~ m/$e/i) || (!$ignorecase && $line =~ m/$e/)) { $gotit = 1; $line = "$exprs{$e}$line$cEnd"; last; } } } else { foreach my $e (keys %exprs) { # screwy case is where match contains $cEnd already.... $gotit ||= ($line =~ m/$e/); if ($ignorecase) { $line =~ s/($e)/$exprs{$e}$1$cEnd/gi; } else { $line =~ s/($e)/$exprs{$e}$1$cEnd/g; } } } if (!$doneWaitingFor) { if ($waitfor eq "*") { if ($gotit) { $doneWaitingFor = 1; } } elsif ($line =~ m/$waitfor/) { $doneWaitingFor = 1; } else { next; } } if ($only == 0 || $gotit) { print $line; } } # while if ($f ne "--stdin--") { close $fh; } } # for system "rm $tfile 2>/dev/null"; exit; ############################################################################### sub cssPropertyList { return ( "accelerator azimuth background background-attachment background-color background-image background-position background-position-x background-position-y background-repeat behavior border border-bottom border-bottom-color border-bottom-style border-bottom-width border-collapse border-color border-left border-left-color border-left-style border-left-width border-right border-right-color border-right-style border-right-width border-spacing border-style border-top border-top-color border-top-style border-top-width border-width bottom caption-side clear clip color content counter-increment counter-reset cue cue-after cue-before cursor direction display elevation empty-cells filter float font font-family font-size font-size-adjust font-stretch font-style font-variant font-weight height ime-mode include-source layer-background-color layer-background-image layout-flow layout-grid layout-grid-char layout-grid-char-spacing layout-grid-line layout-grid-mode layout-grid-type left letter-spacing line-break line-height list-style list-style-image list-style-position list-style-type margin margin-bottom margin-left margin-right margin-top marker-offset marks max-height max-width min-height min-width orphans outline outline-color outline-style outline-width overflow overflow-X overflow-Y padding padding-bottom padding-left padding-right padding-top page page-break-after page-break-before page-break-inside pause pause-after pause-before pitch pitch-range play-during position quotes richness right ruby-align ruby-overhang ruby-position scrollbar-3d-light-color scrollbar-arrow-color scrollbar-base-color scrollbar-dark-shadow-color scrollbar-face-color scrollbar-highlight-color scrollbar-shadow-color scrollbar-track-color size speak speak-header speak-numeral speak-punctuation speech-rate stress table-layout text-align text-align-last text-autospace text-decoration text-indent text-justify text-kashida-space text-overflow text-shadow text-transform text-underline-position top unicode-bidi vertical-align visibility voice-family volume white-space widows width word-break word-spacing word-wrap writing-mode z-index zoom"); } # what else can there be? padding? sub htmlStyleAttributeList { return ( "align axis background bgcolor border cellpadding cellspacing clear color face frameborder height hspace marginheight marginwidth nowrap rules size style valign vspace width"); } ############################################################################### sub showUsage { print "Usage:\n hilite [options] [files] Dumps the file(s) (or stdin if no files are specified) to the terminal, but highlights matches to any regex(es) specified (see -e). General Options: -c color A color name to use for following matches (default = red). Terminal colors are discussed under 'info terminfo'. -e regex A (Perl-style) regex to highlight matches of. Repeatable. Matches will use the latest value for -c. For example: hilite -c red -e 'error' -c yellow -e 'warning' -f file Load expressions from a file. -i Ignore case (applies to all regexes, not just following ones). -l Highlight whole lines, not just the matched part(s). -o Only show lines that have highlighting. -q Suppress most messages. -r Regard case ('-i' is the default). -t n Expand tabs to space, assuming tabs every n spaces. -u Untabify: same as '-t 4'. -version Display version info and exit ($version, sjd). -waitfor pat Suppress all output until (Perl) regex /pat/ is seen. If pat is '*', waits for the first highlightable match. Options that turn on predefined regex sets: (you can add to these with -e, but can't delete expressions from them) (the colors set for these are best with a black background) --aname e XML attribute with (entire) name matching e. --avalue e XML attribute with (entire) value matching e. --css CSS style property names. --cvsstatus output from cvs status, highlighting problems. --diff show lines (-l) with differences in 'diff -y' (side-by-side). --fixBars Use before --diff to try to do better detection of '|' flag. --entities XML entity references. --error 'error' in red, 'warning' in yellow, 'info' in cyan, etc. (this one is really nice!) --gi Start and end tags but only up through the element type name. --htmlstyle CSS style attributes in HTML. --man Man pages (experimental). --ns name XML tags with explicit namespace prefix 'name' (repeatable). --regex Special characters in regular expressions (experimental). --tag name A particular XML tag (repeatable). --test Show expressions to match, but do nothing. --xml XML tags, XSL tags, comments, declarations, entity references. --uri All http uris (not finished). Related commands: colorstring: provides terminal color control strings. Linux 'enscript' can highlight program syntax, or diff output; this program may also be upgraded to do color printing via enscript. Known bugs and limitations: Predefined expressions are not necessarily applied in the order specified. Multi-line tags or comments are not fully highlighted with --xml. Overlapping matches may produce interesting highlighting. --diff highlights some lines with ' | ' in them that aren't diffs. Tp see highlighting with 'more' on some sytems, use 'more -raw'. "; }