#!/usr/bin/perl -w # # ord: By Steven J. DeRose, 2007-10, sderose@acm.org. # # 2007-11-22 sjd: Accept control-char mnemonics as input. Getopt. # Add binary and long-name output. # 2008-02-14 sjd: Multiple input chars. setupCharacterNames(). Unify $fmt. # Add longNames for G0 and G1. Add -go, -g1. perl -w. # 2008-09-03 sjd: Move to BSD. Improve doc. # # To do: # use strict; use Getopt::Long; my $version = "2008-09-03"; # Uniform layout for all displays: #my $fmt = "%7s: Hex 0x%02x, decimal %3d, octal %04o, binary %04b %04b, %s\n"; my $fmt = "%7s, hex 0x%02x, decimal %3d, octal %04o, binary %04b %04b, '%s'\n"; my @C0names = (); my @C0longNames = (); my @G0longNames = (); my @C1names = (); my @C1longNames = (); my @G1longNames = (); setupCharacterNames(); my $help = 0; my $chart = 0; my $C0 = my $C1 = 0; my $G0 = my $G1 = 0; my $quiet = 0; my $verbose = 0; # Process options Getopt::Long::Configure ("ignore_case"); my $result = GetOptions( "c|chart" => \$chart, "c0" => \$C0, "c1" => \$C1, "g0" => \$G0, "g1" => \$G1, "h|help|?" => \$help, "q|quiet!" => \$quiet, "v|verbose+" => \$verbose, "version" => sub { die "Version of $version, by Steven J. DeRose, sderose\@acm.org.\n"; } ); ($result) || die "Bad options.\n"; ################################################################################ if ($help) { showUsage(); exit; } if ($C0) { showChart("c0"); exit; } if ($G0) { showChart("g0"); exit; } if ($C1) { showChart("c1"); exit; } if ($G1) { showChart("g1"); exit; } if ($chart) { showChart("c0"); showChart("g0"); showChart("c1"); showChart("g1"); exit; } ################################################################################ while (my $name = shift) { ($name) || die "No character or control-character mnemonic found (see -h for" . " information on hard-to-type characters).\n"; my $n = 0; if (length($name) > 1) { $name = uc($name); for (my $i=0; $i>4, $n&15, $longName); } else { $n = ord($name); my $longName = getCharacterLongName($n); print sprintf($fmt, $name, $n, $n, $n, $n>>4, $n&15, $longName); } } # while exit; ################################################################################ sub getCharacterLongName { my $n = $_[0]; my $name = ""; if ($n < 32) { $name = $C0longNames[$n]; } elsif ($n < 128) { $name = $G0longNames[$n - 32]; } elsif ($n < 160) { $name = $C1longNames[$n - 128]; } elsif ($n < 255) { $name = $G1longNames[$n - 160]; } if (defined $name && length($name)>1 && $name !~ m/[a-z]/) { $name = lc($name); $name =~ s/(^.| .)/uc($1)/ge; # $name =~ s/(\w)(\w)/$1.lc($2)/ge; } return($name); } sub setupCharacterNames { @C0names = ( "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", "SP"); @C1names = ( "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", "PM", "APC", "NBS"); @C0longNames = ( "Null", "Start Of Heading", "Start Of Text", "End Of Text", "End Of Transmission", "Enquiry", "Acknowledge", "Bell", "Back Space", "Horizontal Tab", "New Line", "Vertical Tab", "Form Feed", "Carriage Return", "Shift Out", "Shift In", "Data Link Escape", "Device Control 1", "Device Control 2", "Device Control 3", "Device Control 4", "Negative Acknowledge", "Synchronous Idle", "End Of Transmission Block", "Cancel", "End Of Medium", "Substitute", "Escape", "Field Separator", "Group Separator", "Record Separator", "Unit Separator", "Space"); @G0longNames = ( # 0x20 "SPACE", "EXCLAMATION MARK", "QUOTATION MARK", "NUMBER SIGN", "DOLLAR SIGN", "PERCENT SIGN", "AMPERSAND", "APOSTROPHE", "LEFT PARENTHESIS", "RIGHT PARENTHESIS", "ASTERISK", "PLUS SIGN", "COMMA", "HYPHEN-MINUS", "FULL STOP", "SOLIDUS", # 0x30 "DIGIT ZERO", "DIGIT ONE", "DIGIT TWO", "DIGIT THREE", "DIGIT FOUR", "DIGIT FIVE", "DIGIT SIX", "DIGIT SEVEN", "DIGIT EIGHT", "DIGIT NINE", "COLON", "SEMICOLON", "LESS-THAN SIGN", "EQUALS SIGN", "GREATER-THAN SIGN", "QUESTION MARK", # 0x40 "COMMERCIAL AT", "LATIN CAPITAL LETTER A", "LATIN CAPITAL LETTER B", "LATIN CAPITAL LETTER C", "LATIN CAPITAL LETTER D", "LATIN CAPITAL LETTER E", "LATIN CAPITAL LETTER F", "LATIN CAPITAL LETTER G", "LATIN CAPITAL LETTER H", "LATIN CAPITAL LETTER I", "LATIN CAPITAL LETTER J", "LATIN CAPITAL LETTER K", "LATIN CAPITAL LETTER L", "LATIN CAPITAL LETTER M", "LATIN CAPITAL LETTER N", "LATIN CAPITAL LETTER O", # 0x50 "LATIN CAPITAL LETTER P", "LATIN CAPITAL LETTER Q", "LATIN CAPITAL LETTER R", "LATIN CAPITAL LETTER S", "LATIN CAPITAL LETTER T", "LATIN CAPITAL LETTER U", "LATIN CAPITAL LETTER V", "LATIN CAPITAL LETTER W", "LATIN CAPITAL LETTER X", "LATIN CAPITAL LETTER Y", "LATIN CAPITAL LETTER Z", "LEFT SQUARE BRACKET", "REVERSE SOLIDUS", "RIGHT SQUARE BRACKET", "CIRCUMFLEX ACCENT", "LOW LINE", # 0x60 "GRAVE ACCENT", "LATIN SMALL LETTER A", "LATIN SMALL LETTER B", "LATIN SMALL LETTER C", "LATIN SMALL LETTER D", "LATIN SMALL LETTER E", "LATIN SMALL LETTER F", "LATIN SMALL LETTER G", "LATIN SMALL LETTER H", "LATIN SMALL LETTER I", "LATIN SMALL LETTER J", "LATIN SMALL LETTER K", "LATIN SMALL LETTER L", "LATIN SMALL LETTER M", "LATIN SMALL LETTER N", "LATIN SMALL LETTER O", # 0x70 "LATIN SMALL LETTER P", "LATIN SMALL LETTER Q", "LATIN SMALL LETTER R", "LATIN SMALL LETTER S", "LATIN SMALL LETTER T", "LATIN SMALL LETTER U", "LATIN SMALL LETTER V", "LATIN SMALL LETTER W", "LATIN SMALL LETTER X", "LATIN SMALL LETTER Y", "LATIN SMALL LETTER Z", "LEFT CURLY BRACKET", "VERTICAL LINE", "RIGHT CURLY BRACKET", "TILDE", " DEL DELETE" ); @C1longNames = ( "Padding Character", "High Octet Preset", "Break Permitted Here", "No Break Here", "Index", "Next Line", "Start of Selected Area", "End of Selected Area", "Horizontal Tab Set", "Horizontal Tab Justified", "Vertical Tab Set", "Partial Line Forward", "Partial Line Backward", "Reverse Line Feed", "Single-Shift 2", "Single-Shift 3", "Device Control String", "Private Use 1", "Private Use 2", "Set Transmit State", "Cancel character", "Message Waiting", "Start of Protected Area", "End of Protected Area", "Start of String", "Single Graphic Char Intro", "Single Char Intro", "Control Sequence Introducer", "String Terminator", "OS Command", "Private Message", "App Program Command", "Non-breaking Space"); @G1longNames = ( # 0xA0 "NO-BREAK SPACE", "INVERTED EXCLAMATION MARK", "CENT SIGN", "POUND SIGN", "CURRENCY SIGN", "YEN SIGN", "BROKEN BAR", "SECTION SIGN", "DIAERESIS", "COPYRIGHT SIGN", "FEMININE ORDINAL INDICATOR", "LEFT-POINTING DOUBLE ANGLE QUOTATION MARK", "NOT SIGN", "SOFT HYPHEN", "REGISTERED SIGN", "MACRON", # 0xB0 "DEGREE SIGN", "PLUS-MINUS SIGN", "SUPERSCRIPT TWO", "SUPERSCRIPT THREE", "ACUTE ACCENT", "MICRO SIGN", "PILCROW SIGN", "MIDDLE DOT", "CEDILLA", "SUPERSCRIPT ONE", "MASCULINE ORDINAL INDICATOR", "RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK", "VULGAR FRACTION ONE QUARTER", "VULGAR FRACTION ONE HALF", "VULGAR FRACTION THREE QUARTERS", "INVERTED QUESTION MARK", # 0xC0 "LATIN CAPITAL LETTER A WITH GRAVE", "LATIN CAPITAL LETTER A WITH ACUTE", "LATIN CAPITAL LETTER A WITH CIRCUMFLEX", "LATIN CAPITAL LETTER A WITH TILDE", "LATIN CAPITAL LETTER A WITH DIAERESIS", "LATIN CAPITAL LETTER A WITH RING ABOVE", "LATIN CAPITAL LETTER AE", "LATIN CAPITAL LETTER C WITH CEDILLA", "LATIN CAPITAL LETTER E WITH GRAVE", "LATIN CAPITAL LETTER E WITH ACUTE", "LATIN CAPITAL LETTER E WITH CIRCUMFLEX", "LATIN CAPITAL LETTER E WITH DIAERESIS", "LATIN CAPITAL LETTER I WITH GRAVE", "LATIN CAPITAL LETTER I WITH ACUTE", "LATIN CAPITAL LETTER I WITH CIRCUMFLEX", "LATIN CAPITAL LETTER I WITH DIAERESIS", # 0xD0 "LATIN CAPITAL LETTER ETH", "LATIN CAPITAL LETTER N WITH TILDE", "LATIN CAPITAL LETTER O WITH GRAVE", "LATIN CAPITAL LETTER O WITH ACUTE", "LATIN CAPITAL LETTER O WITH CIRCUMFLEX", "LATIN CAPITAL LETTER O WITH TILDE", "LATIN CAPITAL LETTER O WITH DIAERESIS", "MULTIPLICATION SIGN", "LATIN CAPITAL LETTER O WITH STROKE", "LATIN CAPITAL LETTER U WITH GRAVE", "LATIN CAPITAL LETTER U WITH ACUTE", "LATIN CAPITAL LETTER U WITH CIRCUMFLEX", "LATIN CAPITAL LETTER U WITH DIAERESIS", "LATIN CAPITAL LETTER Y WITH ACUTE", "LATIN CAPITAL LETTER THORN", "LATIN SMALL LETTER SHARP S", # 0xE0 "LATIN SMALL LETTER A WITH GRAVE", "LATIN SMALL LETTER A WITH ACUTE", "LATIN SMALL LETTER A WITH CIRCUMFLEX", "LATIN SMALL LETTER A WITH TILDE", "LATIN SMALL LETTER A WITH DIAERESIS", "LATIN SMALL LETTER A WITH RING ABOVE", "LATIN SMALL LETTER AE", "LATIN SMALL LETTER C WITH CEDILLA", "LATIN SMALL LETTER E WITH GRAVE", "LATIN SMALL LETTER E WITH ACUTE", "LATIN SMALL LETTER E WITH CIRCUMFLEX", "LATIN SMALL LETTER E WITH DIAERESIS", "LATIN SMALL LETTER I WITH GRAVE", "LATIN SMALL LETTER I WITH ACUTE", "LATIN SMALL LETTER I WITH CIRCUMFLEX", "LATIN SMALL LETTER I WITH DIAERESIS", # 0xF0 "LATIN SMALL LETTER ETH", "LATIN SMALL LETTER N WITH TILDE", "LATIN SMALL LETTER O WITH GRAVE", "LATIN SMALL LETTER O WITH ACUTE", "LATIN SMALL LETTER O WITH CIRCUMFLEX", "LATIN SMALL LETTER O WITH TILDE", "LATIN SMALL LETTER O WITH DIAERESIS", "DIVISION SIGN", "LATIN SMALL LETTER O WITH STROKE", "LATIN SMALL LETTER U WITH GRAVE", "LATIN SMALL LETTER U WITH ACUTE", "LATIN SMALL LETTER U WITH CIRCUMFLEX", "LATIN SMALL LETTER U WITH DIAERESIS", "LATIN SMALL LETTER Y WITH ACUTE", "LATIN SMALL LETTER THORN", "LATIN SMALL LETTER Y WITH DIAERESIS" ); } # setupCharacterNames ################################################################################ sub showChart { ($verbose) && warn "Arg to showChart: $_[0].\n"; print "Character mnemonics and names:\n"; if ($_[0] eq "c0") { print "C0 range:\n"; for (my $n=0; $n>4, $n&15, getCharacterLongName($n)); } } if ($_[0] eq "g0") { print "G0 range:\n"; for (my $n=32; $n<128; $n++) { print sprintf($fmt, "", $n, $n, $n, $n>>4, $n&15, getCharacterLongName($n)); } } if ($_[0] eq "c1") { print "\nC1 range:\n"; for (my $n=128; $n<160; $n++) { print sprintf($fmt, $C1names[$n-128], $n, $n, $n, $n>>4, $n&15, getCharacterLongName($n)); } } if ($_[0] eq "g1") { print "\nG1 range:\n"; for (my $n=160; $n<255; $n++) { print sprintf($fmt, "", $n, $n, $n, $n>>4, $n&15, getCharacterLongName($n)); } } } # showChart ################################################################################ sub showUsage { print " Usage: ord [options] [chars|mnemonics] Displays Latin-1 character codes in hex, decimal, octal, and binary. For control characters, you can type in the short mnemonic, such as CR. You can supply any number of characters (separated by space). Options: -chart Show a chart of character values and mnemonics. -c0 Same chart, but only for C0 range (d0-d31). -g0 Same chart, but only for G0 range (d32-d127). -c1 Same chart, but only for C1 range (d128-d159). -g1 Same chart, but only for G1 range (d160-d255). -version Show version info and exit ($version, sjd). Note: You need to backslash some shell characters to use them as arguments: sp (x20, d32, o40) SP \" (x22, d34, o42) \# (x23, d35, o43) \& (x26, d38, o46) \' (x27, d39, o47) \( (x28, d40, o50) \) (x29, d41, o51) \+ (x2b, d43, 053) (or, you can precede this with '--' (end-of-options) \; (x3b, d59, o73) \< (x3c, d60, o74) \> (x3e, d62, o76) \\ (x5c, d92, o134) \` (x60, d96, o140) \| (x7c, d124, o174) And some you can't escape, like all the control characters, including: \\t (x09, d09, o11) HT \\n (x0a, d10, o12) LF (you can put the newline in double-quotes) \\r (x0d, d13, o15) CR (you can put the return in double-quotes) Related commands: chr: Does the reverse. bases: Converts a number to multiple bases. "; }