#!/usr/bin/perl -w
#
# globalChange
#
# 2010-08-25: Written by Steven J. DeRose.
# 2010-09-12 sjd: Cleanup. Use cp's backup options.
# 2011-03-11 sjd: Improve doc.
# 2011-10-04 sjd: Add package ChangeInfo, unify all the change-spec options.
# 2011-11-07 sjd: Debugging last set of changes.... Implement -optPersistent.
# 2012-01-04 sjd: Improve -test and -verbose. sjdUtils. Drop -color.
# 2012-03-14ff sjd: Add -color back, I miss it. vMsg.
#     Add -interactive. Ditch optX args in favor of usual regex syntax.
#     Drop -from/-to. Start -fieldChoice. Add makeDiffLine(). Use eval() to
#     support regex modifiers and RHS backrefs.
# 2012-03-22 sjd: Ditch -bdir, -bext, just use 'cp -b'. Do changes outbound
#     instead of copy out and change back. Don't touch unchanged files at all.
#     Add ReadKey for -interactive. Add -zero.
# 2012-04-04 sjd: Copy and truncate the temp file, so permissions copy.
# 2012-05-30 sjd: Add -recursive, report more detailed stats.
# 2012-07-09 sjd: Sync with TabularFormats.
# 2012-10-01 sjd: Support STDIN. Simplify ChangeInfo use and discard several
#     functions. Clean up regex apply, pre-compile, add -compileRegex option.
#     Add -global. Finish hooking up TabularFormats. Rest of modifiers.
# 2012-10-08 sjd: Better problem reporting. Off-by-one in makeDiffLine.
#     Make /g work (not compilable with qr//). \n in RHS. Add -perfile.
#     Fix/clean stats logic and names.
# 2012-12-03 sjd: Update color handling for sjdUtils changes. Add -iString.
# 2013-02-06 sjd: Group persistent options into %exprOptions.
#     Let -f files set options via lines starting with "-". Get a few messages
#     to STDERR where they belong. Add output buffering via outputALine().
# 2013-05-13ff: Fix various bugs in -r. Support backslashed delims in -e.
# 2013-06-27f: Add finalize() to write buffered output lines at EOF.
#     Get rid of spurious output. Fix $ofh. Make apply() take \@fields.
#     Ditch $istr in favor of vpush/vpop. Work on -pre.
# 
# To do:
#     Writes "ARRAY(...)" at top of STDOUT??????
#     Finish -pre, -post, -span (span x fields)?
#     Test -fieldChoice, and make it accept field *names*.
#     Test piping in and out.
#     Report # files each change affected (not just total changes).
#     Document how to do special chars in input exprs (-d, -e, -fileOfExprs)
#     Document how to refer to filename/path, and ser number, in exprs?
#     Integrate ReadAny.
#     Use Perl (?adlupimsc)... to turn modifiers on/off in mid-pattern?
#
use strict;
use Getopt::Long;
use Encode;
use Term::ReadKey;

use TabularFormats;
use sjdUtils;

our $VERSION = "2013-06-28";


###############################################################################
# Options
#
my $backup           = 1;
my $color            = ($ENV{USE_COLOR} && -t STDERR) ? 1:0;
my $compileRegex     = 1;
my @exprs            = ();
my $fileOfExprs      = "";
my $force            = 0;
my $iencoding        = "";
my $ilineends        = "";
my $interactive      = 0;
my $iString          = "  ";
my $list             = 0;
my $oencoding        = "";
my $olineends        = "";
my $perFile          = 0;
my $quiet            = 0;
my $recursive        = 0;
my $showChanges      = 0;
my $test             = 0;
my $tickInterval     = 10000;
my $verbose          = 0;
my $zero             = 0;

my %exprOptions = ( # Options copied into each changeExpr
    "fieldChoice"    => 0,          # Which field to look in?
    "global"         => 0,          # Change all occurrences in field?
    "ignoreCase"     => 0,
    "pre"            => "",         # Regex that must be in preceding context
    "preLines"       => 1,          # How many lines in that context
    "post"           => "",         # Regex that must be in following context
    "postLines"      => 1,          # How many lines in that context
    "span"           => 1,          # How many lines the 'from' regex may span
);

my $tmpFile = "/tmp/globalChange.out";

(my $tfmt = new TabularFormats()) ||
    dieCleanly("Could not construct TabularFormats instance.");


###############################################################################
#
my %getoptHash = (
    "backup!"             => \$backup,
    "color!"              => \$color,
    "compileRegex!"       => \$compileRegex,
    "force!"              => \$force,
    "h|help"              => sub { system "perldoc $0"; exit; },
    "iencoding=s"         => \$iencoding,
    "ilineends=s"         => \$ilineends,
    "interactive!"        => \$interactive,
    "iString=s"           => \$iString,
    "list!"               => \$list,
    "listEncodings"       => sub {
        warn "\nEncodings available:\n";
        my $last = ""; my $buf = "";
        for my $k (Encode->encodings(":all")) {
            my $cur = substr($k,0,2);
            if ($cur ne $last) {
                warn "$buf\n";
                $last = $cur; $buf = "";
            }
            $buf .= "$k ";
        }
        warn "$buf\n";
        exit;
    },
    "oencoding=s"         => \$oencoding,
    "olineends=s"         => \$olineends,
    "perFile!"            => \$perFile,
    "q!"                  => \$quiet,
    "r|recursive!"        => \$recursive,
    "reset"               => sub {
        Term::ReadKey::ReadMode 1;
    },
    "showChanges!"        => \$showChanges,
    "test!"               => \$test,
    "tickInterval=i"      => \$tickInterval,
    "unicode!"            => sub { $iencoding = "utf8"; },    
    "v|verbose"           => sub {
        $verbose++;
        sjdUtils::setVerbose($verbose);
    },
    "version"             => sub { 
        dieCleanly("Version of $VERSION, by Steven J. DeRose\n");
    },
    "zero!"               => \$zero,

    # Options that are stored with individual change exprs:
    "fieldChoice=i"       => \$exprOptions{"fieldChoice"},
    "global!"             => \$exprOptions{"global"},
    "i|ignoreCase!"       => \$exprOptions{"ignoreCase"},
    "pre=s"               => \$exprOptions{"pre"},
    "preLines=i"          => \$exprOptions{"preLines"},
    "post=s"              => \$exprOptions{"post"},
    "postLines=i"         => \$exprOptions{"postLines"},
    "span=i"              => \$exprOptions{"span"},

    # Options for specifying individual changes
    "d=s"                 => sub {      # delete matches
        saveChange($_[1], "", "");
    },
    "e=s"                 => sub {      # sed-like -e
        saveChangeFromExpr($_[1]);
    },
    "f|fileOfExprs=s"     => sub {      # file listing changes
        $fileOfExprs = $_[1];
        open(F, $fileOfExprs) ||
            dieCleanly("-f file '$fileOfExprs' could not be opened.\n");
        my $fcount = 0;
        while (my $x=<F>) {
            chomp $x;
            if ($x =~ m/^\s*#/) {  # comments
                next;
            }
            elsif ($x =~ m/^-(\w+)\s+(.*)/) {
                my $opt = $1; my $val = $2;
                if (defined exprOptions{$1}) {
                    $exprOptions{$1} = $2;
                }
                else {
                    dieCleanly("Unknown option in -f file $fileOfExprs:\n$x\n");
                }
            }
            vMsg(2,"Adding change from file: '$x'");
            saveChangeFromExpr($x);
            $fcount++;
        } # EOF
        close F;
        vMsg(1,"Read $fcount expressions from file $fileOfExprs.");
    },
    );

$tfmt->addOptionsToGetoptLongArg(\%getoptHash);
Getopt::Long::Configure ("ignore_case");
GetOptions(%getoptHash) || dieCleanly("Bad options.\n");


###############################################################################
# Set implied options, validate option values...
#
if ($color) {
    sjdUtils::setColors(1);
    sjdUtils::defineMessageType("none", "black");    # "No changes" messages
    sjdUtils::defineMessageType("some", "bg_green"); # "<n> changes" messages
}

# Check on the changes, and see if there's any -fieldChoice going on.
# If so, figure the highest field-number in use.
#
(scalar(@exprs)>0) ||
    dieCleanly("No change(s) specified. Try the '-help' option.\n");

my $maxFieldChoice = 0;
for my $ex (@exprs) {
    if ($ex->{fieldChoice} > $maxFieldChoice) {
        $maxFieldChoice = $ex->{fieldChoice};
    }
}

if ($verbose || $list) {
    hMsg(0,"List of changes in effect:");
    vPush();
    for (my $i=0; $i<scalar(@exprs); $i++) {
        vMsg(0, $exprs[$i]->toString($i));
    }
    vPop();
    vMsg(0,"\n");
}

$ilineends = uc(substr($ilineends."U",0,1));
if    ($ilineends eq "M") { $/ = chr(13); }
elsif ($ilineends eq "D") { $/ = chr(13).chr(10); }
else { }

$olineends = uc(substr($olineends."U",0,1));
if    ($olineends eq "M") { $\ = chr(13); }
elsif ($olineends eq "D") { $\ = chr(13).chr(10); }
else { }


###############################################################################
###############################################################################
# Main
#
my $totalItems         = 0;
my $dirs               = 0;
my $filesRegular       = 0;
my $filesBackup        = 0;
my $filesMissing       = 0;
my $totalFilesChanged  = 0;
my $totalChangedLines  = 0;
my $totalChanges       = 0;
my @totalChangesByExpr = ();
my $totalRecs          = 0;

my $failedOpen         = 0;
my $failedTempOpen     = 0;
my $failedChanges      = 0;
my $failedCopy         = 0;

# Use might want to refer to these in changes....
#
my $recnum             = 0;
my $curFileName        = "";

if ($interactive) {
    Term::ReadKey::ReadMode 3;
}

if (scalar(@ARGV) > 0) {
    while (my $file = shift) {
        doOneItem($file, 1);
    }
}
else {
    open(my $src, "<-") || dieCleanly("Unable to open STDIN.\n");
    if ($iencoding) {
        binmode($src, ":encoding($iencoding)");
    }
    open(my $tgt, ">--") || dieCleanly("Unable to open STDOUT.\n");
    if ($oencoding) {
        binmode($tgt, ":encoding($oencoding)");
    }
    my $nc = doOneFile("-", $src, $tgt);
    close $src;
    close $tgt;
}

Term::ReadKey::ReadMode 1;

($quiet) || report();

exit;


###############################################################################
###############################################################################
# Parse a regex change into from, to, and modifiers, and save it.
#
sub saveChangeFromExpr {
    my ($expr) = @_;
    ($expr =~ m/^s(\W)/) || main::dieCleanly(
        "Expression must start with 's' and a delimiter:\n    $expr\n");
    my $delim = $1;
    if ($delim ne "/") { 
        sjdUtils::vMsg(0,"Delim '$delim' (not '/') is experimental.");
        #$delim = "\/";
    }

    # Parse and slightly validate. 'split(/$delim/,$expr)' not good enough.
    my $exprCopy = $expr;
    my @parts = ();
    while ($exprCopy =~ s/^(.*?[^\\])?$delim//) {
        push @parts, $1;
    }
    if ($exprCopy ne "") { 
        push @parts, $exprCopy;
    }

    my ($theS, $from, $to, $mods, $bogus) = @parts;
    my $prob = (scalar(@parts)<3 || scalar(@parts)>4 ||
                ($mods && $mods =~ m/([^igmsexadlu])/)) ? 1:0;
    if ($prob || $verbose) {
        hMsg(0,"Expression parse:\n" .
            "    expr:  '$expr\n" .
            "    delim: '$delim'\n    '" .
            join("'\n    '", @parts) . "'");
        if ($prob) { dieCleanly("  Could not parse.\n"); }
    }
    saveChange($from, $to, $mods);
} # saveChangeFromExpr

# Save a new change object to the list (in @exprs).
#
sub saveChange {
    my ($from, $to, $mods) = @_;
    if (!$mods) { $mods = ""; }
    my $chg = ChangeInfo::new ChangeInfo($from, $to, $mods);
    push @exprs, $chg;
}


###############################################################################
###############################################################################
# Handle one node (file or directory).
#
sub doOneItem {
    my ($file, $depth) = @_;
    $totalItems++;

    if (!$force && $file =~ m/(\.bak|~|#)$/) {    # Types to ignore
        vMsg(1, "Backup file skipped: '$file'");
        $filesBackup++;
        return;
    }

    if (!-e $file) {                              # Can't find
        eMsg(0, "Can't find file '$file'.");
        $filesMissing++;
        return;
    }

    if (-d $file) {                               # Directory
        $dirs++;
        if (!$recursive) {
            vMsg(1, "Directory '$file' skipped.");
            return;
        }
        vMsg(1, "Recursing into directory '$file'");
        my $theDir;
        opendir($theDir, $file);
        vPush();
        while (my $subitem = readdir($theDir)) {
            ($subitem =~ m/^\./) && next;
            doOneItem("$file/$subitem", $depth+1);
        }
        vPop();
        closedir($theDir);
        return;
    }

    # Could check for specials (cf grep options?)
    # -w File is writable by effective uid/gid.
    # -l File is a symbolic link.
    # -p File is a named pipe (FIFO), or Filehandle is a pipe.
    # -S File is a socket.
    # -b File is a block special file.
    # -c File is a character special file.
    # -u File has setuid bit set.
    # -g File has setgid bit set.
    # -B File is a "binary" file (opposite of -T).

    my $ifh;
    if (!open($ifh, "<$file")) {                  # Can't open
        eMsg(0, "Can't open input file '$file'.");
        $failedOpen++;
        return;
    }
    if ($iencoding) {
        binmode($ifh, ":encoding($iencoding)");
    }

    # Copy, truncate, and append -- so the permissions get set right.
    my $cmd = "cp -P $file $tmpFile";
    system $cmd || eMsg(0, "Command failed: ", $cmd);
    $cmd = "truncate -s 0 $tmpFile";
    system $cmd || eMsg(0, "Command failed: ", $cmd);

    my $ofh;
    if (!open($ofh, ">>$tmpFile")) {
        eMsg(0, "Can't open temp output file '$tmpFile'.");
        $failedTempOpen++;
        next;
    }
    if ($oencoding) {
        binmode($ofh, ":encoding($oencoding)");
    }

    # Do the changes, putting results in the temp file
    #
    my $nc = doOneFile($file, $ifh, $ofh);
    close $ifh;
    close $ofh;

    # Report and finalize (unless -test)
    #
    if ($nc < 0) {
        vMsg(0, "  Changes failed for file '$file'");
        $failedChanges++;
    }
    elsif ($nc == 0) {
        ($zero) && sjdUtils::Msg(
            "none", ">>> No change(s) in file '$file'");
    }
    elsif ($nc > 0) {
        $totalChanges += $nc;
        $totalFilesChanged++;
        if ($test) {
            sjdUtils::Msg(
                "some", ">>> $nc change(s) in file '$file'\n");
        }
        else {
            my $cmd = "cp --preserve" .
                ($backup ? " -b":"") ." '$tmpFile' '$file'";
            if (system "$cmd") {
                eMsg(0, "Copy of '$tmpFile' back to '$file' failed.");
                $failedCopy++;
            }
        }
    }
} # doOneItem


###############################################################################
###############################################################################
# Crank through a single file doing all the changes.
# Could load the whole file and run as one mongo change, I suppose....
#
sub doOneFile {
    my ($file, $ifh, $ofh) = @_;
    ($verbose || $test) && hMsg(2, "Starting file '$file'");
    $filesRegular++;
    $recnum = 0;
    $curFileName = $file;

    if ($maxFieldChoice > 0) {
        $tfmt->attach($ifh) || eMsg(0, "Couldn't attach '$file'.");
    }

    my $headRec = "";
    my @headNames = undef;
    if ($tfmt->getOption("header")) {
        $headRec = $tfmt->readHeader();
        @headNames = @{$tfmt->parseHeader($headRec)};
    }

    my $force = ($interactive) ? 0:1;
    my $nChangesInFile = 0;
    my $quit = 0;
        sjdUtils::vPush();
    while (!$quit && defined (my $rec=getARecord($ifh))) {  # Per line...
        chomp $rec;
        my $original = $rec; # Save a copy for later diffing
        my $fields = $tfmt->parseRecord($rec);              # Parse
        vMsg(1, "Record #$recnum (" . scalar(@{$fields}) . " fields): ", $rec);
        
        my $nExprsInRecord = 0;
        my $nChangesInRecord = 0;
        sjdUtils::vPush();
        for (my $exprNum=0; $exprNum<scalar(@exprs) && !$quit; $exprNum++) {
            vMsg(2, "Applying expr: ", $exprs[$exprNum]->toString($exprNum));
            my ($rec2, $nChangesForExpr) =                  # Try the expr
                $exprs[$exprNum]->apply($fields);

            next unless ($nChangesForExpr);                 # Did it match?

            if ($test || $verbose) {
                vMsg(0, "  Change #$exprNum ($nChangesForExpr matches), line " .
                  "$recnum in '$file':\n", makeDiffLine($rec,$rec2));
            }

            my %actions = ($force || !$interactive) ?       # Interact?
                ("doIt"=>1) : %{askUser($rec, $rec2)};
            while (1) {
                if ($actions{"doIt"}) {
                    $nExprsInRecord++;
                    $nChangesInRecord += $nChangesForExpr;
                    $totalChangesByExpr[$exprNum] += $nChangesForExpr;
                    $rec = $rec2;                           # Apply
                }
                if ($actions{"showPre"})  { warn "<$rec"; }
                if ($actions{"showPost"}) { warn ">$rec2"; }
                if ($actions{"force"})    { $force = 1; }
                if ($actions{"quit"})     { $quit = 1; last; }
                if ($actions{"reAsk"})    { %actions = %{askUser($rec,$rec2)}; }
                else { last; }
            }
        } # for each expr
        vMsg(2, "*EOF*");
        sjdUtils::vPop();

        if ($nChangesInRecord > 0) {                        # Report
            $nChangesInFile += $nChangesInRecord;
            $totalChangedLines++;
            ($verbose || $showChanges) && vMsg(
                3,"  Record #$recnum: $nChangesInRecord changes, " .
                " (via $nExprsInRecord expressions):\n",
                makeDiffLine($original,$rec));
        }
        my $orec = $tfmt->assembleRecord(\@fields);
        outputALine($ofh, $orec);                           # Output the record
    } # while not EOF
    finalize($ofh);                                         # Buffered lines!
    sjdUtils::vPop();
    $totalRecs += $recnum;

    if ($perFile) {                                         # File summary
        vMsg(0, sprintf("%6d changed lines in $file", $nChangesInFile));
    }
    return($nChangesInFile);
} # doOneFile

sub getARecord {
    my ($ifh) = @_;
    my $rec = ($maxFieldChoice > 0) ? $tfmt->readRecord() : <$ifh>;
    if (!defined $rec) { return(undef); }
    #chomp $rec;
    $recnum++;
    if ($tickInterval>0 && $recnum % $tickInterval == 0) {
        vMsg(0,"At line $recnum.");
    }
    return($rec);
}

# Get a command, then make a hash of the individual actions for caller to do.
# Actions are: doIt, quit, force, showPre, showPost, reAsk (and help).
#
sub askUser {
    my ($rec, $rec2) = @_;
    vMsg(0,"Change?\n  <$rec\n  >$rec2");
    my $cmd = <>;
    $cmd = uc(substr($cmd."Y",0,1));

    my %acts = ();
    if    ($cmd eq "Y" || $cmd eq " ") {
        $acts{"doIt"} = 1;
    }
    elsif ($cmd eq "N" || $cmd eq "\r") {
        $acts{"doIt"} = 0;
    }
    elsif ($cmd eq "Q" || $cmd eq "\xFF") {
        $acts{"quit"} = 1;
    }
    elsif ($cmd eq "!") { # do this and all following (all files? ***)
        $acts{"doIt"} = 1;
        $acts{"force"} = 1;
    }
    elsif ($cmd eq ",") { # show result, ask again
        $acts{"showPost"} = 1;
        $acts{"reAsk"} = 1;
    }
    elsif ($cmd eq ".") { # replace then stop
        $acts{"doIt"} = 1;
        $acts{"quit"} = 1;
    }
    elsif ($cmd eq "^L") { # redisplay screen and ask again
        $acts{"showPre"} = 1;
        $acts{"reAsk"} = 1;
    }
    elsif ($cmd eq "^H") { # help and ask again
        warn qq@
Help for query-replace in 'globalChange':
    Y   Replace and continue asking
    N   Don't replace, but  continue asking
    Q   Don't replace, and stop
    .   Replace, then stop
    !   Replace all remaining (no more asking)
    ,   Show result then ask again
    ^L  Redisplay (before change) and ask again
    ^   Back to previous change (not supported)
    ^R  Edit this occurrence (not supported)
    ^W  Delete this occurrence then edit (not supported)
    e   Edit this occurrence if minibuffer (not supported)
        @;
        $acts{"reAsk"} = 1;
    }

    #elsif ($cmd eq "^") { # go back
    #}
    #elsif ($cmd eq "^R") { # edit this occurrence
    #}
    #elsif ($cmd eq "^W") { # delete and edit
    #}
    #elsif ($cmd eq "e") { # edit in minibuffer
    #}
    else {
        warn "Unknown response ('h' for help)\n";
        $acts{"reAsk"} = 1;
    }
    return(\%acts);
} # askUser


# Colorize the non-matching center portion of two strings.
# That is, everything from the first differing column to the last.
#
sub makeDiffLine {
    my ($r1, $r2) = @_;
    my $r1c = my $r2c = "";
    if (!$color || ($r1 eq $r2)) {
        $r1c = $r1;
        $r2c = $r2;
    }
    else {
        my $r1len = length($r1);
        my $r2len = length($r2);
        my $minlen = ($r1len < $r2len) ? $r1len:$r2len;
        my $firstDiff = my $lastDiff = 0;
        while ($firstDiff<$minlen &&
               substr($r1,$firstDiff,1) eq substr($r2,$firstDiff,1)) {
            $firstDiff++;
        }
        while ($lastDiff<$minlen &&
               substr($r1,$r1len-$lastDiff-1,1) eq
               substr($r2,$r2len-$lastDiff-1,1)) {
            $lastDiff++;
        }
        $r1c =
            substr($r1,0,$firstDiff) .
            colorize("red",substr($r1,$firstDiff,$r1len-$firstDiff-$lastDiff)) .
            substr($r1,$r1len-$lastDiff);
        $r2c =
            substr($r2,0,$firstDiff) .
            colorize("red",substr($r2,$firstDiff,$r2len-$firstDiff-$lastDiff)) .
            substr($r2,$r2len-$lastDiff);
    }
    return("  <$r1c\n  >$r2c");
} # makeDiffLine


###############################################################################
# Maintain an output buffer so we can look back at preceding lines.
# Mainly for -span and -pre.
# NOTE: The output is going to a temp file that was copied from the source
#       file and then truncated (that way permissions are right). If we
#       are in -test mode, the file gets discarded; else moved to source.
#
BEGIN {
    my @outBuf = [];
    my $outSize = 1;

    sub getPreLine {
        my ($n) = @_;
        return($outBuf[-$n]);
    }

    sub outputALine {
        my ($ofh, $s) = @_;
        while (scalar(@outBuf) > $outSize) {
            my $buf = shift @outBuf;
            print $ofh "$buf\n";
        }
        push @outBuf, $s;
    }
    sub finalize {
        my ($ofh) = @_;
        while (scalar(@outBuf) > 0) {
            my $buf = shift @outBuf;
            print $ofh "$buf\n";
        }
    }
} # END

###############################################################################
#
sub report {
    my $failed = $failedOpen + $failedTempOpen + $failedChanges + $failedCopy;

    ($quiet) || vMsg(0,
          "\nDone" . ($test ? " (test only)":"") . ".\n" .
          "$totalItems total items: $filesRegular files, $dirs directories, " .
          colorIf($filesBackup,  "$filesBackup backups, ") .
          colorIf($filesMissing, "$filesMissing not found.") . "\n" .
          sjdUtils::lpadc($totalRecs) . " total records processed.\n" .
          "$totalFilesChanged files changed, " .
          sjdUtils::lpadc($totalChangedLines) . " lines changed, " .
          sjdUtils::lpadc($totalChanges) . " total changes.");

    if ($failed>0) {
        vMsg(0, "Failed: open: $failedOpen, temp open: $failedTempOpen, " .
              "changes: $failedChanges, finalize: $failedCopy.");
    }

    my $nc = scalar(@exprs);
    ($nc <= 1) && return;
    vMsg(0,"Number of applications of each change:");
    for (my $i=0; $i<$nc; $i++) {
        vMsg(0, sprintf("  Change %2d of %2d: %6d occurrences",
                     $i, $nc, $totalChangesByExpr[$i] || 0));
    }
} # report

sub colorIf {
    my ($cond, $msg) = @_;
    if ($color && $cond) {
        return(sjdUtils::colorize("red", $msg));
    }
    return($msg);
}

sub dieCleanly {
    my ($msg) = @_;
    Term::ReadKey::ReadMode 1;
    chomp $msg;
    die "$msg\n";
}


###############################################################################
###############################################################################
###############################################################################
# A ChangeInfo object stores one s/// command and can apply it.
#
# Modifiers for regex change commands, per PERL:
#     http://perldoc.perl.org/perlre.html#Modifiers
#     i  ignore case
#     g  global
#     m  multi-line
#     s  single-line
#     e  eval the RHS
#     x  allow whitespace
# Character set modifiers:
#     a  ascii
#     d  platform default unless evidence to contrary
#     l  locale
#     u  unicode
#
package ChangeInfo;

sub new {
    my ($class,
        $f,              # 'from' regex
        $t,              # 'to' value
        $m,              # Perl-like modifiers
        ) = @_;
    (scalar(@_) == 4) ||
        main::dieCleanly("Wrong # args (" . scalar(@_) . 
                         ", not 4) to ChangeInfo constructor.\n");
    sjdUtils::vMsg(3, "Constructing a new change ('/' will be escaped):",
          sjdUtils::showInvisibles("s/$f/$t/$m"));

    my $self = {
        from         => $f,
        cfrom        => undef,
        to           => $t,
        mods         => $m ||
            ( ($exprOptions{"ignoreCase"} ? "i":"") .
              ($exprOptions{"global"} ? "g":"") ),
        fieldChoice  => $exprOptions{"fieldChoice"},
        pre          => $exprOptions{"pre"},
        postLines    => $exprOptions{"postLines"},
        post         => $exprOptions{"post"},
        preLines     => $exprOptions{"preLines"},
        span         => $exprOptions{"span"},
    };

    if ($compileRegex) {
        # Must use eval(), or qr// can't handle variable modifiers!
        $f =~ s|/|\\/|g; # ???
        # Not all modifiers can be compiled.... Like 'g'.
        (my $mCompilable = $m) =~ s/[^imosx]//g; 
        $self->{cfrom} = eval("qr/$f/$mCompilable");
        if ($@) {
            main::dieCleanly("eval() failed compiling qr/$f/$m\n");
        }
    }

    bless $self, $class;
    return($self);
} # new ChangeInfo

# Carry out the change represented by this object, on a record.
# Also: return the number of changes made?
#
sub apply {
    my ($self, $fields) = @_;
    my $data = $fields->[$self->{fieldChoice}];

    ($self->preIsSatisfied()) || return($data,0);

    my $cmd = "\$data =~ s/";
    if ($compileRegex) {
        $cmd .= "\$self->{cfrom}";
    }
    else {
        $cmd .= $self->{from};
    }
    # Include the mods even if compiled, since some like 'g' can't compile.
    $cmd .= "/" . $self->{to} . "/" . $self->{mods};
    my $nchanges;
    BEGIN {
        no warnings 'all'; # Esp. for "\1 better written as $1..."
        $nchanges = eval($cmd);
        if ($@ ne "") {
            main::dieCleanly("eval() failed for:\n    s/" . $self->{from} .
                             "/\n      " . $self->{to} . "/" . $self->{mods} .
                             "\nMessage: $@");
        }
    } # END
    $fields->[$self->{fieldChoice}] = $data;
    return($data, $nchanges);
} # apply

sub preIsSatisfied {
    my ($self) = @_;
    if (!defined $self->{pre}) { return(1); } # No -pre condition
    for (my $i=1; $i<=$self->{preLines}; $i++) {
        if (getPreLine($i) =~ $self->{pre}) {
            return(1);
        }
    }
    return(0);
}

sub toString {
    my ($self, $i) = @_;
    my $msg = sprintf(
        "  %2d: s|%s|%s|%s",
        $i, $self->{from}, $self->{to}, $self->{mods});
    return($msg);
}

# End of ChangeInfo package.


###############################################################################
###############################################################################
###############################################################################
#

=pod

=head1 Usage

globalChange [options] [files]

Do a global-change(s) on the named file(s), in place. 

To be queried for each change, see I<-interactive>.
To see what changes I<would> be made, but not actually do them, see I<-test>.

You can specify the change(s) using Perl regex features (see below), 
in the following forms (all repeatable).
Expressions are applied in the order specified.

B<Note>: Remember to use the C<g> modifier
if you want all occurrences on each line changed (see also I<-global>).

=over

=item * -f I<file> (I<file> contains a list of changes, one per line).

=item * -e I<s/regex/subst/modifiers>

=item * -d I<regexToDelete>

=back

Files already ending in C<.bak> or C<#> or C<~> are not changed.
Changed files replace the originals. 
The original files are backed up using C<cp -b> (but see I<-backup>).

At the end, a summary of the number of files and records changed
will be displayed.
If more than one regex change was being applied, then the
number of successful applications of each one will also be displayed.
To suppress these reports, use I<-quiet>.

=head2 Why globalchange instead of sed?

This script does not have script-like capabilities like
C<sed>, or have "hold" and "pattern" spaces (though some similar effects
can be achieved with I<-pre> and I<-post>).
This script also does not do transliteration,
or have "addressing" of changes.

It is, insteaad, meant just to do regex changes, but do them really well.
For example, it

=over

=item * Supports Unicode and many other character encodings
(see I<-listEncodings>)

=item * Has I<-recursive> (still experimental)

=item * Provides interactive query-replace (interface like C<emacs>,
except that you can't go back to 'undo' previous changes)

=item * Has I<-test> mode, and
can colorize what changed in the before and after forms of lines

=item * Can limit changes to certain fields (experimental)

=item * Supports full Perl-style regular expressions (as of this writing,
C<sed> lacks a I<-P> option like C<grep> has)

=item * Permits arbitrary Perl code on right hand side (experimental)

=item * Does the equivalent of C<sed>'s I<--separate>
and I<--in-place=extension> by default

=item * Simple multi-line matching is in development, but not yet
finished. See I<-spanLines>, I<-preLines>, I<-postLines>,
I<-pre>, and I<-post>, below.

=back


=head1 Options

(prefix 'no' to negate where applicable)

=over

=item * B<-backup>

Make backups like the *nix C<cp -b> command.
Default: on (turn off with I<-nobackup>).

=item * B<-color>

Colorize STDERR output. Defaults to on if environment variable C<USE_COLOR>
is set and STDERR is going to a terminal. This is especially useful with
I<-test>, since you then see the before/after versions of each changed line,
with the changed portion colorized.

=item * B<-compileRegex>

Pre-compile the left-hand sides of regexes to apply, once each, rather
than interpreting them once per line. Default: on.
B<Note>: The /g modifier on searches will prevent compilation (this appears
to be a Perl bug/feature).

=item * B<-d> I<regex>

Shorthand for deleting text matched by I<regex> as you would with I<-e>.
Repeatable.

=item * B<-e> I<s/from/to/modifiers>

A Perl-style regex change expression to do.
You can use a delimiter other than "/" if desired.
The delimiter can also be backslashed if desired.
Repeatable (changes are done in the order specified).
To see how your expression(s) were interpreted, use I<-v>.

Each change expression (whether specified via I<-e>, I<-d>, or I<-f>,
gets a copy of several options, as they are set when the change expression
is specified. 
The options that "stick" to each change expression are:
I<-ignoreCase>, I<-global>, I<-fieldChoice>, 
I<-post>, I<-postLines>, I<-pre>, I<-preLines>, I<-spanLines>.
Those options remain in effect for any later change expressions, unless
they are explicitly re-set.

=item * B<-f> I<path> or B<-fileOfExprs> I<path>

Read regex changes to do from this file, one per line in
C<s/from/to/modifiers> form, and treats each one as if specified via I<-e>.
Lines beginning with (optional whitespace and then) "#" are ignored.
Repeatable.

B<Note>: There is no way (yet) to specify a I<-fieldChoice> value for lines
of such a file; the value in effect when this option is specified applies
to all.

=item * B<-fieldChoice> I<n>

Only apply the changes within field I<n> (counting from 1).
By default, input records are treated entire, and not parsed into fields
(this behavior can be explicitly chosen by setting I<-fieldChoice 0>).
This option applies to all following changes, until itself changed
(in other words, only one value is in effect at a time).
You can only specify one field for each change-expression (I<-e>) to apply to,
but separate expressions can apply to different fields.

See L<"Input file format options">, and the help for
C<TabularFormats.pm>, for details on field-parsing options.

=item * B<-force>

Also operate on apparent backup files, such as those
ending in '~', '#', or '.bak' (by default they are skipped).

=item * B<-global>

Apply following changes to all matches in each record, rather than just
the first (turn off again with I<-noglobal>). For I<individual> regexes,
you can append the modifier C<g> instead (for example, C<s/foo/bar/g>).

=item * B<-iencoding> I<t>

Character set to use for input. See also I<-listEncodings>.

=item * B<-ignoreCase>

Ignore case distinctions for following matches (turn off again with 
I<-noignoreCase>). For I<individual> regexes,
you can append the modifier C<i> (for example, C<s/foo/bar/i>).

=item * B<-ilineends> I<t>

Assume Unix, Dos, or Mac line-breaks for input.

=item * B<-interactive>

(experimental)
Ask the user before doing each change. The commands recognized are
based largely on C<emacs>
(L<http://www.gnu.org/software/emacs/manual/html_node/emacs/Query-Replace.html>):

=over

=item B<SPACE> or B<Y> -- replace and find next

=item B<DEL> or B<N> -- skip

=item B<CR> or B<q> -- stop replacing (exit)

=item B<!>  -- replace this and all following (*** To EOF or all files? ***)

=item B<.>  -- replace this instance then exit

=item B<^L> -- redisplay screen and ask again

=item B<,>  -- show result of replacement, then ask again

=item B<^H> -- help and ask again

=item B<^>  -- go back to previous occurance (not supported)

=item B<^R> -- edit this occurrence (not supported)

=item B<^W> -- delete and edit (not supported)

=item B<e>  -- edit in minibuffer (not supported)

=item

=back

=item * B<-iString> I<s>

Use I<s> instead of the default "  " as the string to repeat in order to
indent progress reports (this is only really useful with I<-r>).

=item * B<-listEncodings>

Show all the encodings supported by I<-iencoding> and I<-oencoding>, and exit.

=item * B<-oencoding> I<t>

Character set to use for output. See also I<-listEncodings>.

=item * B<-olineends> I<t>

Write Unix, Dos, or Mac line-breaks for output.

=item * B<-perFile>

Report the number of changes in each individual file, not just the grand total.

=item * B<-q>

Suppress most messages, including the summary report (see also I<-v>).

=item * B<-recursive>

Modify files in a whole subtree (experimental). See also I<-iString>.

=item * B<-showChanges>

Display all lines that actually change (see also I<-v> and I<-test>).

=item * B<-test>

Show the specific changes that would be made, but don't actually I<do> them.
Files that would not change are not reported at all unless you use I<-zero>.
I<-test> also turns up reporting, similar to I<-v>.

=item * B<-tickInterval> I<n>

Print a message after every I<n> input records (C<0> to turn off).

=item * B<-unicode>

Synonym for I<-iencoding utf8>.

=item * B<-v>

Add more messages (repeatable). The first I<-v> will report the original
and final form of each changed record; an additional I<-v> will also 
show each particular change.

=item * B<-version>

Show version/license info and exit.

=item * B<-zero>

With I<-test>, report each file even if it would have no changes.

=back


=head2 Unfinished options

=over

=item B<-post> I<regex>

Just like I<-pre>, but places a constraint via I<following> lines rather
than preceding lines.
Incomplete and experimental.

=item B<-postLines> I<n>

Just like I<-preLines>, but sets the scope of I<-post> rather than I<-pre>.
Incomplete and experimental. Default: 1.

=item B<-pre> I<regex>

Like I<ignoreCase> and some other options, this applies to following
change expressions, until overridden (by setting it to 0 or 1).
It constrains expressions so that they can only match if the I<regex>
specified on I<-pre> is I<also> matched, within some number of lines of
prior context (see I<-prelines>). For example,

    -pre '/^<h1>/' -e 's/<li\b/<ul>\n<li/'

would insert an HTML unordered-list start-tag before a list-item start-tag,
but I<only> if the preceding line began with a heading-1 start-tag.
I<-pre> constraints are not affected by I<-fieldChoice>.
Incomplete and experimental.

=item B<-preLines> I<n>

Sets the number of lines of preceding context within which a I<-pre>
constraint must be matched. Default: 1, meaning that the
1 preceding line is eligible. Has no effect if I<-pre> is not used after it.
Remains effective until explicitly changed. Default: 1.
Incomplete and experimental.

=item B<-span> I<n>

Like I<ignoreCase> and some other options, this applies to following
change expressions, until overridden (by setting it to 0 or 1). 
It allows change expressions to match even if split across multiple lines.
It implies the Perl regex modifier "s", and 
it causes I<-fieldChoice> (if set) to be ignored (though not cleared).
Of course, it causes at least I<n> lines to be buffered at once, so
multi-line matching is possible.
Incomplete and experimental.

=back


=for nobody ===================================================================

=head2 Input file format options 

These are only useful with I<-fieldChoice>.
See C<TabularFormats.pm> for the available options, which are numerous.
The most important are probably I<-basictype> (to select CSV, JSON, XSV, SEXP,
etc.), and I<-fieldSep> (to select a CSV-style delimiter such as TAB or comma).


=for nobody ===================================================================

=head1 Known bugs and limitations

Be careful to escape '$' on I<both> sides of expressions, lest you 
interpolate some Perl variable (of course, maybe you want to do that,
for example with C<$recnum> or C<$curFileName>).

How to escape metacharacters in expressions is not necessarily obvious.
If you encounter problems, try I<-v>, and I<-test> so you see what the
regexes parse to, and what they do on actual data.

Modifiers and several options are little tested. 
The /g modifier won't compile, so will be slow.

You can only specify one field for each change-expression to apply to.

It would be nice to be able to change options via special lines in a
I<-f> file.


=for nobody ===================================================================

=head1 Related commands

C<emacs> and many other editors can also do a nice job of this. For example:

  M-x find-grep-dired RET whatToFind RET
  % m pathFilter RET
  Q whatToFind RET whatToChangeTo RET

Followed by essentially the same commands used with I<-interactive> here.
C<emacs> can also go backwards.

C<sed> -- similar to this script, but lacks character set, PCRE, 
and query-replace, and has a variety of other options instead.

C<grepData> -- similar Perl-based searching and TabularFormats support,
but no changes.

C<TabularFormats.pm> -- parsing for the I<-fieldChoice> option.

C<rename>, C<renameFiles> -- Similar changes, to filenames rather than content.


=for nobody ===================================================================

=head1 Regex syntax summary (Perl, or essentially PCRE)

This summary is not complete. See also L<http://perldoc.perl.org/perlre.html>.

=over

=item * B<.> -- any character (except newline, unless Regex Modifier /s is on).

=item * B<[ace-m]> -- any of the listed characters.
'^' at the start negates the list. 
'-' for ranges (to get hyphen, put it first (except for '^')).

=item * B<[:posixclass:]> -- named sets of characters (this goes
*inside* the normal [], for example: C<[[:alpha:]]>.
Class names: 
I<alpha, alnum, ascii, blank, cntrl, digit, graph, lower,
print, punct, space, upper, word, xdigit>.

=item * B<()> -- grouping, and saves matched text for use in the 'to' part
(right hand side) of the change. Groups are numbered in order of their "(".
You can also name groups, like I<< (?<name>...) >>, and refer to them
via I<\g{name}>.

Unlike with C<sed> and C<emacs>, 
you B<do not backslash grouping parentheses> to get this effect;
instead, you backslash when you want a literal parenthesis.
To refer to the n-th parenthesized match, use '$n' with the group number I<n>,
or '\g{n}' with I<n> either a group number or group name.
Do not use \n as with the C<sed> command).

=item * B<|> -- match either the pattern on the right or the one on the left.

=item * B<*, +, ?, {min,max}> -- repeatability of prior item or group.
Normally these do a "greedy" match, matching as many repetitions as possible.
Appending a '?', as in '[aeiou]*?', makes a non-greedy match, matching
the least number possible. For example, '<.*?>' will match from a less-than
sign, up to the I<first> following greater-than sign rather than all the way
to the I<last> following greater-than sign as with '<.*>'.

=item * B<^, $> -- start, end of string/line. The exact meaning is
affected by the various L<Regex Modifiers>.


=item * Backslashed character classes (capitalize to invert a class).
See also the Posix character-class names, above.

=over

=item B<\b>: word-boundary (includes line-breaks and start/end of string),

=item B<\w>: word character (includes letters, digits, underscore, and certain
other Unicode connector punctuation characters),

=item B<\d>: digit (may include Unicode numerics other than (0-9),

=item B<\s>: whitespace

=back


=item * Backslashed special-character codes: 

=over

=item  B<\a>: bell,

=item  B<\e>: escape,

=item  B<\f>: formfeed, 

=item  B<\n>: newline, 

=item  B<\r>: carriage return,

=item  B<\t>: tab, 

=item  B<\C>: any octet (byte), even if part of a larger character,

=item  B<\cX>: control character X,

=item  B<\xFF> or B<\x{FF}>: char with hex code point FF, 

=item  B<\N{name}> or B<\N{U+FFFF}>: Unicode character by name or code point,

=item  B<\o{77}> char with octal code point 77.

=item

=back


=item * Special \-codes for the right-hand-side:

=over

=item  B<\l>: lowercase the next character,

=item  B<\u>: uppercase the next character,

=item  B<\L>: lowercase on,

=item  B<\U>: uppercase on,

=item  B<\Q>: metacharacters off,

=item  B<\E>: end B<\L> or B<\U> or B<\Q>.

=back

=back


=head3 Look-Around Assertions

Look-around assertions are zero-width patterns which match a specific
pattern without including it in $& . Positive assertions match when
their subpattern matches, negative when it fails.

=over

=item B<< (?=pattern) >>

A zero-width positive look-ahead assertion. For example,
C</\w+(?=\t)/> matches a word followed by a tab, 
without including the tab in $& .  

=item B<< (?!pattern) >>

A zero-width negative look-ahead assertion. For example
C</foo(?!bar)/> matches any "foo" that isn't followed by "bar". 
You cannot use this for look-behind.

If you are looking for a "bar" that isn't preceded by a "foo",
C</(?!foo)bar/> will not do what you want. That's because the (?!foo)
is just saying that the next thing cannot be "foo"--and it's not,
it's a "bar", so "foobar" will match. Use look-behind instead (see below).

=item B<< (?<=pattern) \K >>

A zero-width positive look-behind assertion. For example,
C</(?<=\t)\w+/> matches a word that follows a tab, without including
the tab in $& . Works only for fixed-width look-behind.

A special form, \K , "keeps" everything it had matched prior
to the \K and does not include it in $& . This effectively provides
variable-length look-behind.

=item B<< (?<!pattern) >>

A zero-width negative look-behind assertion. For example
C</(?<!bar)foo/> matches any "foo" that does not follow
"bar". Works only for fixed-width look-behind.

=back

(there are additional regex operators)


=head2 B<Regex Modifiers>

(see also L<http://perldoc.perl.org/perlre.html#Modifiers>)

=over

=item * B<i> ignore case distinctions for matching.

=item * B<g> global change, instead of just the first instance per line.
B<Note>: This is not the default.

=item * B<e> add another layer of evaluation (using Perl eval()).

=item * B<x> allow ignorable whitespace in the expression.

=item * B<s> allow '.' (the wildcard) to include newlines (this won't do much
for you here, unless you're also using non-*nix -ilineends, or say you are).

=item * B<m> allows '^' and '$' to match next to newlines, not just at the
beginning and end of the entire string.

=item * B<a, d, l, u> -- choose which of 4 sets of character-set rules
to apply (ASCII, Default, Locale-based, or Unicode).

=back


=for nobody ===================================================================

=head1 Ownership

This work by Steven J. DeRose is licensed under a Creative Commons 
Attribution-Share Alike 3.0 Unported License. For further information on
this license, see L<http://creativecommons.org/licenses/by-sa/3.0/>.

The author's present email is sderose at acm.org.

For the most recent version, see L<http://www.derose.net/steve/utilities/>.

=cut