#!/usr/bin/perl -w # # splitat: Put a line break before each occurrence of a string. # # 2007-02-02: Written by Steven J. DeRose, sderoses@acm.org. # 2008-08-12 sjd: Doc. # # To do: # use strict; use Getopt::Long; my $version = "2008-08-12"; my $dft_expr = "<[\\w!?]"; my $uglyExpr = "(\\]\\]|\\\?|--|/)?>"; # For breaking inside XML tags my $after = 0; my $expr = $dft_expr; my $help = 0; my $indent = 0; my $keepOriginalNewlines = 0; my $outLineends = "U"; my $quiet = 0; my $replace = 0; my $useUgly = 0; my $verbose = 0; # Process options Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( "a|after!" => \$after, "e=s" => \$expr, "h|help|?" => \$help, "i|indent=n" => \$indent, "keep!" => \$keepOriginalNewlines, "outlinends|outlineends=s" => \$outLineends, "q|quiet!" => \$quiet, "r|replace" => \$replace, "u!" => \$useUgly, "v|verbose+" => \$verbose, "version" => sub { die "Version of $version, by Steven J. DeRose, sderose\@acm.org.\n"; } ); if ($help) { showUsage(); exit; } ($result) || die "Bad options.\n"; # Validate and default options ($after && $replace) && die "Can't have both -a and -r.\n"; if ($useUgly) { $expr = $uglyExpr; } $outLineends = uc(substr($outLineends."U",0,1)); my $newline = "\n"; if ($outLineends eq "M") { $newline = chr(13); } elsif ($outLineends eq "D") { $newline = chr(13).chr(10); } elsif ($outLineends eq "U") { $newline = chr(10); } else { die "Unknown output line-end type '$outLineends'.\n"; } ($verbose) && warn "Expression is '$expr', -after is $after.\n"; ################################################################################ my $istring = $newline . (" " x $indent); if (scalar @ARGV) { foreach my $f (@ARGV) { my $fh; if (!(open $fh, "<$f")) { warn "Cannot find file '$f'\n"; } while (my $l = readline $fh) { chomp $l; if ($after) { $l =~ s/($expr)/$1$istring/g; } elsif ($replace) { $l =~ s/($expr)/$istring/g; } else { $l =~ s/($expr)/$istring$1/g; } print "$l"; ($keepOriginalNewlines) && print "$newline"; } close $fh; } } else { # stdin while (my $l = <>) { chomp $l; if ($after) { $l =~ s/($expr)/$1$istring/g; } elsif ($replace) { $l =~ s/($expr)/$istring/g; } else { $l =~ s/($expr)/$istring$1/g; } print "$l"; ($keepOriginalNewlines) && print "$newline"; } } print "\n"; exit; ################################################################################ sub showUsage { warn " Usage: splitat [options] [-e 'regex'] [files] Insert a line-break before (or after or instead of) each match of the regex. Can do one or many files, or take input from stdin. Options: -after Put breaks *after* instead of *before* matches. -e 'regex' A Perl regex to say what to split before (default '$dft_expr', for XML markup). -indent n Indent broken lines by n spaces. -keep Keep original newlines as well -outlineends [type] Write M(ac), D(OS), or default U(nix) breaks. -replace Replace the match instead of breaking before or after. -u Shorthand for breaking just inside the end of XML tags, PIs, comments, and CDATA sections. Same as -e '$uglyExpr' -q Suppress most messages. -v Add more detailed messages. -version Display version info and exit ($version, sjd). Note: If there is already a newline before the match, another is still added. Related commands: fold Break lines at certain colum width. normalizeXML Much richer formatting features. "; }