+ if ($input eq '') {
+ $row{offset} = delete $row{startpoint};
+ $row{set} = 'Unicode characters';
+ my $block = $row{offset} >> 8;
+ $row{endpoint} ||= ($block + 1 << 8) - 1;
+ $block == ($row{endpoint} >> 8) or undef $block;
+
+ $row{table} = join '', map { chr } $row{offset} .. $row{endpoint};
+ utf8::upgrade($row{table}); # prevent latin1 output
+
+ $row{endpoint} -= $row{offset};
+
+ if (defined $block) {
+ $row{set} = sprintf 'Unicode block U+%02Xxx', $block;
+ $row{offset} %= 0x100;
+ }
+ }
+ elsif (lc $input eq 'uu') {
+ $row{cell} = do 'charset-ucplanes.inc.pl'
+ or Alert('Table data could not be read', $@ || $!);
+ $row{endpoint} ||= 0x3FF;
+ $row{set} = 'Unicode planes';
+ }
+ elsif (lc $input eq 'u') {
+ $row{cell} = do 'charset-unicode.inc.pl'
+ or Alert('Table data could not be read', $@ || $!);
+
+ $row{endpoint} ||= 0x1FFF;
+ $row{set} = 'Unicode ' . (
+ $row{startpoint} < 0x1000 && $row{endpoint} < 0x1000 ? 'BMP' :
+ $row{startpoint} >= 0x1000 && $row{endpoint} < 0x2000 ? 'SMP' :
+ 'allocations'
+ );
+ }
+ elsif ($input =~ m/^utf-*8$/i) {
+ $row{set} = 'UTF-8';
+ $row{cell} = do 'charset-utf8.inc.pl'
+ or Alert('Table data could not be read', $@ || $!);
+ $row{endpoint} = 0xFF;
+ }
+ elsif ($row{set} = Encode::resolve_alias($input)) {
+ $row{offset} = delete $row{startpoint};
+ $row{endpoint} ||= 0xFF;
+ if ($row{set} eq 'MacHebrew' or $row{set} eq 'MacThai') {
+ # array of possibly multiple characters per code point
+ $row{table} = [
+ map { Encode::decode($row{set}, pack 'C*', $_) } $row{offset} .. $row{endpoint}
+ ];
+ }
+ else {
+ # ~16x faster than decoding in loop;
+ # substr strings is twice as fast as splitting to an array
+ $row{table} = Encode::decode($row{set}, pack 'C*', $row{offset} .. $row{endpoint});
+ }
+
+ if ($row{set} eq 'cp437') {
+ if ($row{offset} <= 0xED and $row{endpoint} >= 0xED) {
+ # replace phi glyph
+ substr($row{table}, 0xED - $row{offset}, 1) = 'ϕ';
+ }
+ if ($row{offset} < 0x20) {
+ # replace control characters by visible variants
+ my $sub = substr ' ☺☻♥♦♣♠•◘○◙♂♀♪♫☼►◄↕‼¶§▬↨↑↓→←∟↔▲▼', $row{offset};
+ substr($row{table}, 0, length $sub) = $sub;
+ }
+ }
+ elsif ($row{set} eq 'symbol') {
+ if ($row{offset} <= 0x60 and $row{endpoint} >= 0x60) {
+ # replace radical extender by closest unicode equivalent
+ substr($row{table}, 0x60 - $row{offset}, 1) = '│';
+ }
+ if ($row{offset} <= 0xBD and $row{endpoint} >= 0xFF) {
+ substr($row{table}, 0xBD - $row{offset}, 2) = '⏐⎯'; # arrow extenders
+ substr($row{table}, 0xD2 - $row{offset}, 3) = '®©™'; # serif variants
+ substr($row{table}, 0xE0 - $row{offset}, 1) = '◊'; # replace lookalike, should match AdobeSymbol
+ substr($row{table}, 0xE2 - $row{offset}, 3) = '®©™'; # sans-serif variants
+ substr($row{table}, 0xE6 - $row{offset}, 10) = '⎛⎜⎝⎡⎢⎣⎧⎨⎩⎪';
+ substr($row{table}, 0xF0 - $row{offset}, 1) = '€';
+ substr($row{table}, 0xF4 - $row{offset}, 11) = '⎮⌡⎞⎟⎠⎤⎥⎦⎫⎬⎭';
+ }
+ }
+
+ $row{endpoint} -= $row{offset};
+
+ $visible->{ascii} = # assume common base
+ $visible->{ $row{set} } = 1;
+ }
+ else {
+ Alert("Encoding <q>$input</q> unknown");
+ return;
+ }
+ push @request, \%row;