#!/usr/bin/perl -w
#
# ord: By Steven J. DeRose, 2007-10, sderose@acm.org.
#
# 2007-11-22 sjd:  Accept control-char mnemonics as input. Getopt.
#     Add binary and long-name output.
# 2008-02-14 sjd: Multiple input chars. setupCharacterNames(). Unify $fmt.
#     Add longNames for G0 and G1. Add -go, -g1. perl -w.
# 2008-09-03 sjd: Move to BSD. Improve doc.
# 2008-09-16 sjd: Better handling of Unicode input.
# 2010-01-06 sjd: Use 'charnames' to know Unicode names. Add -binary.
#     Make print utf-8 and actual Unicode character. Format binary better.
# 2010-05-03 sjd: perldoc. Unify formatting. Add Unix Jargon names, rest of
#     short names. Make user use "_" in.
# 2011-08-23 sjd: Add options to control each display form separately.
#     Start -cp1252.
# 2011-12-11 sjd: Add utf-8 output. Opt out of longNames (lists of
#     char names -- now using viacode instead).
# 2012-01-10 sjd: Cleanup. Lose internal 'longNames' lists.
# 2012-08-15 sjd: sjdUtils, and use getUTF8().
# 2013-06-17ff sjd: Add -entities, esp. HTML named ones. Add showUnicodeInfo().
#
# To do:
#     Way to print remaining Unicode char properties
#     Maybe integrate w/ chr?
#     Recognize partial char names? Search for all matches?
#     Switch remaining data into xmlTuples.
#
use strict;
use Getopt::Long;
use charnames ':full';
use Unicode::UCD 'charscript';
use Unicode::UCD 'charblock';
use HTML::Entities;
use Encode;

use sjdUtils;

our $VERSION = "2013-06-19";

my @C0names     = ();
my @G0names     = ();
my @C1names     = ();
my @G1names     = ();

my %uj          = ();

setupShortCharacterNames();
setupUnixJargon();

my $binary      = 0;
my $chart       = 0;
my $cp1252      = 0;
my $decimal     = 1;
my $entities    = 1;
my $C0 = my $C1 = 0;
my $G0 = my $G1 = 0;
my $hex         = 1;
my $jargon      = 1;
my $literal     = 1;
my $long        = 1;
my $octal       = 1;
my $quiet       = 0;
my $short       = 0;
my $utf8        = 1;
my $verbose     = 0;


###############################################################################
# Process options
#
Getopt::Long::Configure ("ignore_case");
my $result = GetOptions(
    "binary!"             => \$binary,
    "c|chart"             => \$chart,
    "cp1252!"             => \$cp1252,
    "c0"                  => \$C0,
    "c1"                  => \$C1,
    "decimal!"            => \$decimal,
    "entities!"           => \$entities,
    "g0"                  => \$G0,
    "g1"                  => \$G1,
    "h|help|?"            => sub { system "perldoc $0"; },
    "hex!"                => \$hex,
    "jargon!"             => \$jargon,
    "literal!"            => \$literal,
    "long!"               => \$long,
    "octal!"              => \$octal,
    "q|quiet!"            => \$quiet,
    "short!"              => \$short,
    "utf8!"               => \$utf8,
    "v|verbose+"          => \$verbose,
    "version"             => sub {
        warn "Version of $VERSION, by Steven J. DeRose.\n"; 
        exit;
    },
    );

($result) || die "Bad options.\n";


###############################################################################
#
if ($C0)       { showChart("c0"); exit; }
if ($G0)       { showChart("g0"); exit; }
if ($C1)       { showChart("c1"); exit; }
if ($G1)       { showChart("g1"); exit; }
if ($chart) {
    showChart("c0");
    showChart("g0");
    showChart("c1");
    showChart("g1");
    exit;
}


###############################################################################
# (Data also available in tupleSets/cp1252.xsv)
#
my %cp1252 = (
    0x80 => 0x20AC,   # EURO SIGN
    0x82 => 0x201A,   # SINGLE LOW-9 QUOTATION MARK
    0x83 => 0x0192,   # LATIN SMALL LETTER F WITH HOOK
    0x84 => 0x201E,   # DOUBLE LOW-9 QUOTATION MARK
    0x85 => 0x2026,   # HORIZONTAL ELLIPSIS
    0x86 => 0x2020,   # DAGGER
    0x87 => 0x2021,   # DOUBLE DAGGER
    0x88 => 0x02C6,   # MODIFIER LETTER CIRCUMFLEX ACCENT
    0x89 => 0x2030,   # PER MILLE SIGN
    0x8A => 0x0160,   # LATIN CAPITAL LETTER S WITH CARON
    0x8B => 0x2039,   # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
    0x8C => 0x0152,   # LATIN CAPITAL LIGATURE OE
    0x8E => 0x017D,   # LATIN CAPITAL LETTER Z WITH CARON
    0x91 => 0x2018,   # LEFT SINGLE QUOTATION MARK
    0x92 => 0x2019,   # RIGHT SINGLE QUOTATION MARK
    0x93 => 0x201C,   # LEFT DOUBLE QUOTATION MARK
    0x94 => 0x201D,   # RIGHT DOUBLE QUOTATION MARK
    0x95 => 0x2022,   # BULLET
    0x96 => 0x2013,   # EN DASH
    0x97 => 0x2014,   # EM DASH
    0x98 => 0x02DC,   # SMALL TILDE
    0x99 => 0x2122,   # TRADE MARK SIGN
    0x9A => 0x0161,   # LATIN SMALL LETTER S WITH CARON
    0x9B => 0x203A,   # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
    0x9C => 0x0153,   # LATIN SMALL LIGATURE OE
    0x9E => 0x017E,   # LATIN SMALL LETTER Z WITH CARON
    0x9F => 0x0178,   # LATIN CAPITAL LETTER Y WITH DIAERESIS
    );


###############################################################################
#
print "";
binmode(STDOUT,":utf8");

while (my $name = shift) {
    ($name) ||
      die "No character or control-character mnemonic found (see -h for"
        . " information on hard-to-type characters).\n";
    
    my $n = 0;
    
    if (length($name) > 1) { # Search for the name
        $name = uc($name);                  # C0 name?
        for (my $i=0; $i<scalar @C0names; $i++) {
            if ($C0names[$i] eq $name) { $n = $i; last; }
        }
        if ($n==0) {                        # C1 name?
            for (my $i=0; $i<scalar @C1names; $i++) {
                if (lc($C1names[$i]) eq lc($name)) { $n = $i+128; last; }
            }
        }
        if ($n==0) {                        # Long Unicode name
            (my $tname = uc($name)) =~ s/_+/ /g;
            $n = charnames::vianame($tname);
        }
        if (!defined $n || $n==0) {         # Unix Jargon name?
            for my $k (keys %uj) {
                if ($uj{$k} =~ m/\b$name\b/i) { $n = ord($k); last; }
            }
        }
        if (!defined $n || $n==0) {        # Number per se?
            if ($name =~ m/^0?x[0-9a-f]+$/i) {
                $name =~ s/0?x//i; $n = hex($name);
            }
            elsif ($name =~ m/^0[0-7]+$/)    { $n = oct($name); }
            elsif ($name =~ m/^[0-9]+$/)     { $n = $name - 0; }
        }
        if (!defined $n || $n==0) {
            warn "Character mnemonic '$name' (length "
                . length($name) . ") not found.\n";
            next;
        }
    }
    else { # A single character, so just convert it.
        $n = ord($name);
    }
    showOne($n);
} # while

exit;


###############################################################################
#
sub showChart {
    my ($sectionName) = @_;
    ($verbose) && warn "Arg to showChart: $sectionName.\n";
    print "Character mnemonics and names:\n";

    if ($sectionName eq "c0") {
        print "C0 range:\n";
        for (my $n=0; $n<scalar @C0names; $n++) {
            showOne($n);
        }
    }

    if ($sectionName eq "g0") {
        print "G0 range:\n";
        for (my $n=32; $n<128; $n++) {
            showOne($n);
        }
    }

    if ($sectionName eq "c1") {
        print "\nC1 range:\n";
        for (my $n=128; $n<160; $n++) {
            showOne($n);
        }
    }

    if ($sectionName eq "g1") {
        print "\nG1 range:\n";
        for (my $n=160; $n<255; $n++) {
            showOne($n);
        }
    }
} # showChart


###############################################################################
#
sub getBinary {
    my ($n) = @_;
    my $rc = "";
    while ($n > 0) {
        my $n0 = $n & 0x0F;
        $n = $n>>4;
        my $n1 = $n & 0x0F;
        $n = $n>>4;
        $rc = sprintf("%04b_%04b %s", $n1, $n0, $rc);
    }
    $rc =~ s/\s+$//;
    return($rc);
}


###############################################################################
# Display just one character, with unified output formatting.
#
# Note: The widths aren't right for big unicode stuff.
#
sub showOne {
    my ($n) = @_;
    my $n2 = 0;
    if ($cp1252 && $n>=128 && $n<160) {
        $n2 = cp1252ToUnicode($n);
    }
    if ($short) {        
        pline(sprintf(" %6s", "'" . getShortName($n2 ? $n2:$n) . "'"));
    }

    my $bases = "";
    if ($binary)  { $bases .= getBinary($n) . " ";    }
    if ($octal)   { $bases .= sprintf("o%04o ",$n);    }
    if ($decimal) { $bases .= sprintf("d%04d ",$n);    }
    if ($hex)     { $bases .= sprintf("x%04x ",$n);    }
    if ($bases) {
        pline("Bases:", $bases);
    }

    if ($utf8) {
        my $utf = sjdUtils::getUTF8($n2 ? $n2:$n, "\\x");
        pline("Unicode:", sprintf("U+%04x, utf8 %s", $n, $utf));
    }

    if ($entities) {
        my $entName = HTML::Entities::encode(chr($n));
        if ($entName =~ m/^&#/) { $entName = "-NO HTML NAMED ENTITY-"; }
        pline("Entities:", sprintf("&#%d; &#x%x; %s", $n, $n, $entName));
    }

    if ($long) {
        showUnicodeInfo($n);
    }

    if ($literal) {
        pline("Literal:", chr($n));
    }

    if ($jargon && defined $uj{chr($n)}) {
        pline("Unix jargon:", $uj{chr($n)});
    }
} # showOne


sub pline {
    my ($label, $data) = @_;
    printf("    %-16s %s\n", $label, $data || "");
}

sub showUnicodeInfo {
    my ($n) = @_;
    if (!isUnicodeCodePoint($n)) {
        pline("WARNING:", "Not a Unicode code point");
    }
    else {
        pline("Unicode Name:", charnames::viacode($n) || "-NOT FOUND-");
        pline("Unicode Script: ", charscript(sprintf("U+%04x", $n)));
        pline("Unicode Block:  ",  charblock(sprintf("U+%04x", $n)));
    }
    my $pnum = $n >> 16;
    my $pname = "";
    if    ($pnum == 16) { $pname = "Supplementary Private Use Area B"; }
    elsif ($pnum == 15) { $pname = "Supplementary Private Use Area A"; }
    elsif ($pnum == 14) { $pname = "Supplementary Special-purpose"; }
    elsif ($pnum >=  3) { $pname = "Unassigned"; }
    elsif ($pnum ==  2) { $pname = "Supplementary Ideographic"; }
    elsif ($pnum ==  1) { $pname = "Supplementary Multilingual"; }
    elsif ($pnum ==  0) { $pname = "Basic Multilingual"; }
    else                { $pname = "-UNKNOWN-"; }
    pline("Unicode Plane:" , $pnum . ": " . $pname);

    if ($n == 0xEFBFBD) {
        pline("WARNING:", "UTF8 of U+FFFD (Replacement Character)?");
    }
} # showUnicodeInfo


###############################################################################
#
sub getShortName {
    my ($n) = @_;
    my $name = "";
    if ($n>=256) {
        $name = charnames::viacode($n);
        if (!$name) { $name = "???"; }
    }
    else {
        if ($n < 32) {
            $name = $C0names[$n];
        }
        elsif ($n < 128) {
            $name = $G0names[$n-32];
        }
        elsif ($n < 160) {
            $name = $C1names[$n-128];
        }
        elsif ($n < 256) {
            $name = $G1names[$n-160];
        }
    }
    return($name);
}


###############################################################################
# Define local names, in case we want shorter forms than viacode gives.
#
sub setupShortCharacterNames {
    @C0names = (
              "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 
               "BS",  "HT",  "LF",  "VT",  "FF",  "CR",  "SO",  "SI", 
              "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 
              "CAN",  "EM", "SUB", "ESC",  "FS",  "GS",  "RS",  "US",
               "SP");

    @C1names = (
              "PAD", "HOP",  "BPH", "NBH", "IND", "NEL", "SSA", "ESA",
              "HTS", "HTJ",  "VTS", "PLD", "PLU",  "RI", "SS2", "SS3", 
              "DCS", "PU1",  "PU2", "STS", "CCH",  "MW", "SPA", "EPA",
              "SOS", "SGCI", "SCI", "CSI",  "ST", "OSC",  "PM", "APC",
              "NBS");
    
    @G0names = ( # Generally from *nix jargon file
        # 0x20
        "SPACE",
        "BANG",
        "QUOTATION MARK",
        "HASH",
        "DOLLAR",
        "GRAPES",
        "AMP",
        "POP",
        "LEFT PARENTHESIS",
        "RIGHT PARENTHESIS",
        "ASTERISK",
        "PLUS SIGN",
        "COMMA",
        "HYPHEN-MINUS",
        "FULL STOP",
        "SOLIDUS",

        # 0x30
        "DIGIT ZERO",
        "DIGIT ONE",
        "DIGIT TWO",
        "DIGIT THREE",
        "DIGIT FOUR",
        "DIGIT FIVE",
        "DIGIT SIX",
        "DIGIT SEVEN",
        "DIGIT EIGHT",
        "DIGIT NINE",
        "COLON",
        "SEMICOLON",
        "LESS-THAN SIGN",
        "EQUALS SIGN",
        "GREATER-THAN SIGN",
        "QUESTION MARK",

        # 0x40
        "COMMERCIAL AT",
        "LATIN CAPITAL LETTER A",
        "LATIN CAPITAL LETTER B",
        "LATIN CAPITAL LETTER C",
        "LATIN CAPITAL LETTER D",
        "LATIN CAPITAL LETTER E",
        "LATIN CAPITAL LETTER F",
        "LATIN CAPITAL LETTER G",
        "LATIN CAPITAL LETTER H",
        "LATIN CAPITAL LETTER I",
        "LATIN CAPITAL LETTER J",
        "LATIN CAPITAL LETTER K",
        "LATIN CAPITAL LETTER L",
        "LATIN CAPITAL LETTER M",
        "LATIN CAPITAL LETTER N",
        "LATIN CAPITAL LETTER O",

        # 0x50
        "LATIN CAPITAL LETTER P",
        "LATIN CAPITAL LETTER Q",
        "LATIN CAPITAL LETTER R",
        "LATIN CAPITAL LETTER S",
        "LATIN CAPITAL LETTER T",
        "LATIN CAPITAL LETTER U",
        "LATIN CAPITAL LETTER V",
        "LATIN CAPITAL LETTER W",
        "LATIN CAPITAL LETTER X",
        "LATIN CAPITAL LETTER Y",
        "LATIN CAPITAL LETTER Z",
        "LEFT SQUARE BRACKET",
        "REVERSE SOLIDUS",
        "RIGHT SQUARE BRACKET",
        "CIRCUMFLEX ACCENT",
        "LOW LINE",

        # 0x60
        "GRAVE ACCENT",
        "LATIN SMALL LETTER A",
        "LATIN SMALL LETTER B",
        "LATIN SMALL LETTER C",
        "LATIN SMALL LETTER D",
        "LATIN SMALL LETTER E",
        "LATIN SMALL LETTER F",
        "LATIN SMALL LETTER G",
        "LATIN SMALL LETTER H",
        "LATIN SMALL LETTER I",
        "LATIN SMALL LETTER J",
        "LATIN SMALL LETTER K",
        "LATIN SMALL LETTER L",
        "LATIN SMALL LETTER M",
        "LATIN SMALL LETTER N",
        "LATIN SMALL LETTER O",

        # 0x70
        "LATIN SMALL LETTER P",
        "LATIN SMALL LETTER Q",
        "LATIN SMALL LETTER R",
        "LATIN SMALL LETTER S",
        "LATIN SMALL LETTER T",
        "LATIN SMALL LETTER U",
        "LATIN SMALL LETTER V",
        "LATIN SMALL LETTER W",
        "LATIN SMALL LETTER X",
        "LATIN SMALL LETTER Y",
        "LATIN SMALL LETTER Z",
        "LEFT CURLY BRACKET",
        "VERTICAL LINE",
        "RIGHT CURLY BRACKET",
        "TILDE",
        "<control> DEL DELETE"
        );
    ($G0names[126-32] eq "TILDE") || die
        "ord: Internal G0 name table screwed up.\n";
} # setupCharacterNames


###############################################################################
#
sub setupUnixJargon {
    %uj = (
        "!" => "Common: bang; pling; excl; not; shriek; ball-bat. " .
        "Rare: factorial; exclam; smash; cuss; boing; yell; wow; hey; " .
        "wham; eureka; spark-spot; soldier, control",
        "\"" => "Common: double quote; quote. " .
        "Rare: literal mark; double-glitch; snakebite; dirk; " .
        "rabbit-ears; double prime",
        "#" => "Common: number sign; pound; pound sign; hash; " .
        "sharp; crunch; hex; mesh. " .
        "Rare: grid; cross-hatch; octothorpe; flash; pig-pen; " .
        "tic-tac-toe; scratchmark; thud; thump; splat",
        "\$" => "Common: dollar. " .
        "Rare: currency symbol; buck; cash; bling; string (from BASIC); " .
        "escape (when used as the echo of ASCII ESC); ding; cache; big money",
        "%" => "Common: percent; mod; grapes. " .
        "Rare: double-oh-seven",
        "&" => "Common: amp; amper; and, and sign. " .
        "Rare: address (from C); reference (from C++); andpersand; " .
        "bitand; background (from sh(1) ); pretzel",
        "'" => "Common: single quote; quote. " .
        "Rare: prime; glitch; tick; irk; pop; spark;",
        "(" => "Common: l paren; l parenthesis; leftight; open; paren; " .
        "o paren; o parenthesis; l parenthesis; l banana. " .
        "Rare: so; lparen; o round bracket, l round bracket, wax; " .
        "parenthisey; l ear",
        ")" => " Common: r paren; r parenthesis; right; close; the-sis; " .
        "c paren; c parenthesis; r parenthesis; r banana. " .
        "Rare: al-ready; rparen; c round bracket, r round bracket, " .
        "wane; unparenthisey; r ear",
        "*" => "Common: star; splat. " .
        "Rare: wildcard; gear; dingle; mult; spider; aster; " .
        "times; twinkle; glob; Nathan Hale",
        "+" => "Common: add. " .
        "Rare: cross; intersection",
        "," => "" .
        "Rare: tail",
        "-" => "Common: dash. " .
        "Rare: worm; option; dak; bithorpe",
        "." => "Common: dot; point. " .
        "Rare: radix point; full stop; spot",
        "/" => "Common: slash; stroke; forward slash. " .
        "Rare: diagonal; solidus; over; slak; virgule; slat",
        ":" => "Common: . " .
        "Rare: dots; two-spot",
        ";" => "Common: semi. " .
        "Rare: weenie; hybrid, pit-thwong",
        "<" => "Common: bra; l angle; l angle bracket; l broket. " .
        "Rare: from; read from; comes-from; in; crunch; tic; angle",
        ">" => "Common: ket; r angle; r angle bracket; r broket. " .
        "Rare: into, towards; write to; gozinta; out; zap; tac; right angle",
        "=" => "Common: gets; takes. " .
        "Rare: quadrathorpe; half-mesh",
        "?" => "Common: query; ques . " .
        "Rare: quiz; whatmark; what; wildchar; huh; hook; " .
        "buttonhook; hunchback",
        "@" => "Common: at sign; at; strudel. " .
        "Rare: each; vortex; whorl; whirlpool; cyclone; snail; " .
        "ape; cat; rose; cabbage;",
        "V" => "" .
        "Rare: book",
        "[" => "Common: l square bracket; l bracket; bracket. " .
        "Rare: square; U turn",
        "]" => "Common: r square bracket; r bracket; unbracket. " .
        "Rare: un-square; U turn back",
        "\\" => "Common: backslash, hack, whack; escape; reverse slash; " .
        "slosh; backslant; backwhack. " .
        "Rare: bash; reversed virgule; reverse solidus; rsol; backslat",
        "^" => "Common: hat; control; uparrow; caret. " .
        "Rare: xor sign, chevron; shark; shark-fin; to the; " .
        "to the power of; fang; pointer",
        "_" => "Common: underscore; underbar; under. " .
        "Rare: score; backarrow; skid; flatworm",
        "`" => "Common: backquote; left quote; left single quote; " .
        "open quote; grave. " .
        "Rare: backprime; backspark; unapostrophe; birk; blugle; " .
        "back tick; back glitch; push; quasiquote",
        "{" => "Common: o brace; l brace; l squiggly; l squiggly bracket, " .
        "l squiggly brace; l curly bracket, l curly brace. " .
        "Rare: brace; curly-curly; l squirrelly; embrace",
        "}" => "Common: c brace; r brace; r squiggly; r squiggly bracket, " .
        "r squiggly brace; r curly bracket; r curly brace. " .
        "Rare: unbrace; un-curly; r squirrelly; bracelet",
        "|" => "Common: bar; or; or-bar; v-bar; pipe; vertical bar. " .
        "Rare: gozinta; thru; pipesinta; spike",
        "~" => "Common: squiggle; twiddle; not. " .
        "Rare: approx; wiggle; swung dash; enyay"
        );
} # setupUnixJargon


###############################################################################
# See http://www.microsoft.com/typography/unicode/1252.htm
#
sub cp1252ToUnicode {
    my ($char) = @_;
    return($cp1252{$char});
}


###############################################################################
#

=pod

=head1 Usage

ord [options] [chars|mnemonics]

Displays Unicode character code point numbers and other information
about a character(s). For example, "ord BULLET" produces:

    Bases:           o20042 d8226 x2022 
    Unicode:         U+2022, utf8 \xe2\x80\xa2
    Entities:        &#8226; &#x2022; &bull;
    Unicode Name:    BULLET
    Unicode Script:  Common
    Unicode Block:   General Punctuation
    Unicode Plane:   0: Basic Multilingual
    Literal:         •

You can specify the character in several ways;

=over

=item * via a code point, e.g. '\xb9'.

=item * For control characters, their short mnemonics such as C<CR>.

=item * Full Unicode names like "APL FUNCTIONAL SYMBOL LEFTWARDS VANE",
ignoring case. But if there are spaces in the name, you need to
quote it or use "_" instead of each space. Unfortunately, approximate names
are not (yet) supported.

=item * A fairly large selection of *nix jargon names (likewise,
quote them if they contain spaces).

Default output includes the character's short name, code point number
(in hexadecimal, decimal, octal, and optional I<-binary>), utf-8 coding in hex,
Unicode long name, Unix Jargon file names for the character, 
and the literal character itself.


=head1 Options

(prefix 'no' to option name to negate where applicable)

=over

=item * B<-binary>

Show code points in binary.

=item * B<-chart>

Show a chart of character values and mnemonics.
See also I<-c0>, I<-c1>, I<-g0>, and I<-g1> for partial charts.

=item * B<-cp1252>

Assume char set is cp1252.

=item * B<-c0>

Same as I<-chart>, but only for C0 range (d0-d31).

=item * B<-c1>

Same chart, but only for C1 range (d128-d159).

=item * B<-decimal>

Display code points in decimal (default).

=item * B<-entities>

Display the named HTML special-character entity (if any), and the
SGML/HTML/XML numeric character references (decimal and hexadecimal).

=item * B<-g0>

Same chart, but only for G0 range (d32-d127).

=item * B<-g1>

Same chart, but only for G1 range (d160-d255).

=item * B<-hex>

Display code points in hexadecimal (default).

=item * B<-jargon>

Display applicable *nix jargon names (default).

=item * B<-literal>

Include literal character in output (default).

=item * B<-long>

Show long names for characters (default).

=item * B<-octal>

Display code points in octal (default).

=item * B<-short>

Show short names for characters.

=item * B<-utf8>

Show UTF-8 byte sequence for the character (default).

=item * B<-version>

Show version/license info and exit.

=back


=head2 Note

You need to backslash and/or quote some characters to use them as arguments:

    sp  (x20,  d32,  o40)
    \"  (x22,  d34,  o42)
    \#  (x23,  d35,  o43)
    \&  (x26,  d38,  o46)
    \'  (x27,  d39,  o47)
    \(  (x28,  d40,  o50)
    \)  (x29,  d41,  o51)
    \+  (x2b,  d43,  053)
        (or, you can precede this with '--' (end-of-options)
    \;  (x3b,  d59,  o73)
    \<  (x3c,  d60,  o74)
    \>  (x3e,  d62,  o76)
    \\  (x5c,  d92, o134)
    \`  (x60,  d96, o140)
    \|  (x7c, d124, o174)

And some you can't escape in some shells, including:

    \\t (x09,  d09,  o11) HT
    \\n (x0a,  d10,  o12) LF (you can put the newline in double-quotes)
    \\r (x0d,  d13,  o15) CR (you can put the return in double-quotes)


=head1 Known bugs and limitations

Even with a Unicode-enabled terminal such as C<uxterm>, a character > 255
may appear to be length > 1, and so will be taken as a name. But when
the name is not found, we print out the value anyway.


=head1 Related commands

C<chr> -- Does the reverse.

C<showNumberInBases> -- Converts a number to multiple bases.


=head1 Ownership

This work by Steven J. DeRose is licensed under a Creative Commons 
Attribution-Share Alike 3.0 Unported License. For further information on
this license, see L<http://creativecommons.org/licenses/by-sa/3.0/>.

The author's present email is sderose at acm.org.

For the most recent version, see L<http://www.derose.net/steve/utilities/>.

=cut