$di{$mnem} = hex $char;
}
+# XXX
+my %trans = (
+ 0xE001 => 0, # join lines: not accepted
+ 0xE004 => 0, # umlaut is no different from diaeresis 0x0308
+ 0xE005 => 0x0344, # discouraged
+ 0xE006 => 0x0300,
+ 0xE007 => 0x0301,
+ 0xE008 => 0x0302,
+ 0xE009 => 0x0303,
+ 0xE00A => 0x0304,
+ 0xE00B => 0x0306,
+ 0xE00C => 0x0307,
+ 0xE00D => 0x0308,
+ 0xE00E => 0x030A,
+ 0xE00F => 0x030B,
+ 0xE010 => 0x030C,
+ 0xE011 => 0x0327,
+ 0xE012 => 0x0328,
+ 0xE013 => 0x0332,
+ 0xE014 => 0x0333,
+ 0xE015 => 0x0338,
+ 0xE016 => 0x0345,
+ 0xE017 => 0x0314,
+ 0xE018 => 0x0313,
+ 0xE019 => 0x1FFE,
+ 0xE01A => 0x1FBF,
+ 0xE01B => 0x03D0, # middle beta = curled beta?
+ 0xE01C => 0x25CB,
+ 0xE01D => 0x0192,
+ 0xE01E => 0x0292,
+ 0xE01F => 0x33C2, # am, compatibility char
+ 0xE020 => 0x33D8, # pm, compatibility char
+ 0xE021 => 0x2121,
+ 0xE022 => 0xFE8E,
+ 0xE023 => 0, # dutch guilder 0192 is already encoded, and not very useful anyway
+ 0xE024 => 0x0393,
+ 0xE025 => 0x20D7, # also 20D1; non-spacing
+ 0xE026 => 0x1FEF,
+ 0xE027 => 0x1FC0,
+ 0xE028 => 0x01F0, #but uppercase
+);
+for (values %di) {
+ $_ >= 0xE000 or next;
+ $_ = $trans{$_} if defined $trans{$_};
+}
+
# personal addendums
my @extra;
if (-r 'shiar.inc.txt') {
# mark unofficial extras as such
$info{$_}->{category} .= ' Xz' for @extra;
+for (keys %di) {
+ $info{$_}->{string} = chr(9676) . chr($di{$_}) if $info{$_}->{combining};
+ # find control characters (first 32 chars from 0 and 128)
+ next unless ($di{$_} & ~0b1001_1111) == 0 or $di{$_} == 127;
+ # rename to something more descriptive
+ $info{$_}->{name} = $info{$_}->{unicode10}
+ ? '<'.$info{$_}->{unicode10}.'>' # the old name was much more useful
+ : sprintf('<control U+%04X>', $di{$_}); # at least identify by value
+ # show descriptive symbols instead of control chars themselves
+ $info{$_}->{string} = $di{$_} < 32 ? chr($di{$_} + 0x2400) : chr(0xFFFD);
+}
+
+# convert info hashes into arrays of strings to output in display order
+for my $row (values %info) {
+ $row = [ map { $row->{$_} } qw/name category script string/ ];
+ # strip off trailing missing values (especially string may be unknown)
+ defined $row->[-1] ? last : pop @$row for 1 .. @$row;
+}
+
# output perl code of hash
# (assume no backslashes or curlies, so we can just q{} w/o escaping)
-print "{\n";
+print "+{\n";
+printf '(map {$_=>0} qw{%s}),'."\n", join(' ',
+ map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %di
+);
printf "q{%s}=>[%s],\n", $_, join(',',
- $di{$_}, # glyph code point
+ $di{$_}, # original code point
$info{$_} # optional additional arguments
- ? map {"'$_'"} @{ $info{$_} }{qw/name category script/}
+ ? map {"'$_'"} @{ $info{$_} }
: ()
) for sort keys %di;
print "}\n";