6 use open OUT => ':utf8', ':std';
10 # translation table for deprecated code points
12 0xE001 => 0, # join lines: not accepted
13 0xE004 => 0, # umlaut is no different from diaeresis 0x0308
14 0xE005 => 0x0344, # discouraged
36 0xE01B => 0x03D0, # middle beta = curled beta?
40 0xE01F => 0x33C2, # am, compatibility char
41 0xE020 => 0x33D8, # pm, compatibility char
44 0xE023 => 0, # dutch guilder 0192 is already encoded, and not very useful anyway
46 0xE025 => 0x20D7, # also 20D1; non-spacing
49 0xE028 => 0x01F0, #but uppercase
52 # expect input data source at command line
53 @ARGV or die "Specify input source file or - for STDIN\n";
55 # skip everything until a character indented by 1 space (table start)
58 defined or die "Premature input end";
61 my @line = $_; # add first line (already read, assume it's ok)
63 # read the rest of the character table
64 while ($_ = readline) {
65 # check for table end (chapter 4)
68 # parse table lines (ignore (unindented) page break)
72 # append line contents
74 # continuation line (add to last entry)
83 # output perl code of hash
84 # (assume no backslashes or curlies, so we can just q{} w/o escaping)
85 say "# automatically generated by $0";
89 my ($mnem, $chrhex, $name) = split / +/, $_, 3;
90 next if length $mnem != 2;
91 my $chrnum = hex $chrhex;
92 $chrnum = $replace{$chrnum} or next if defined $replace{$chrnum};
93 say "q{$mnem}=>$chrnum,";
101 mkdigraphs-rfc - Output digraph data from RFC-1345
105 Extract digraphs from text specifications as a perl hash:
107 mkdigraphs-rfc rfc1345.txt >digraphs-rfc.inc.pl
109 Input can be the literal RFC (or similar) document:
111 curl http://www.ietf.org/rfc/rfc1345.txt | mkdigraphlist -
113 Test by printing the character for DO (should be a dollar sign):
115 perl -e'$di = do "digraphs-rfc.inc.pl"; print chr $di->{DO}'
119 Parses the official RFC-1345 document, searching the
120 'character mnemonic table' for all digraph definitions.
121 If successful, Perl code is output resulting in a hash
122 with Unicode code points keyed by digraph.
123 Obsolete values (references to private use area)
124 are converted to modern alternatives.
125 Any errors and warnings are given at STDERR.
129 Mischa POSLAWSKY <perl@shiar.org>
133 Licensed under the GNU Affero General Public License version 3.