X-Git-Url: http://git.shiar.net/sheet.git/blobdiff_plain/56786a220fd951f5e6dabb21ef70be1dd3b80389..bac74b5934f22cc3d0e971d26fe0275f7f5db7b3:/charset.plp?ds=sidebyside diff --git a/charset.plp b/charset.plp index d6d853e..3bf294e 100644 --- a/charset.plp +++ b/charset.plp @@ -2,7 +2,7 @@ Html({ title => 'charset cheat sheet', - version => 'v1.0', + version => '1.0', description => [ "Reference sheet with all glyphs in common character encoding tables,", "and an overview of Unicode ranges and UTF-8 bytes.", @@ -12,6 +12,7 @@ Html({ reference common overview table '], stylesheet => [qw'light'], + data => [qw'charset-unicode.inc.pl charset-utf8.inc.pl'], }); :> @@ -20,6 +21,8 @@ Html({ <: use Shiar_Sheet::FormatChar; my $glyphs = Shiar_Sheet::FormatChar->new; +my @nibble = (0..9, 'A'..'F'); +my $nibsize = 1; use Encode qw(decode resolve_alias); # generate character table(s) @@ -28,12 +31,12 @@ use Encode qw(decode resolve_alias); my %ALIAS = ( # default => [qw(unicode utf-8 iso-8859-1 cp437 -cp1252- --iso-8859-15- -koi8-f)], default => [qw(unicode- utf-8 iso-8859-1 -cp1252- --iso-8859-15- cp437 -cp850)], - 0 => [qw(cp437 cp863)], - 1 => [qw(iso-8859-1 cp1252 MacRoman cp850)], - 2 => [qw(iso-8859-2 cp1250 cp852 MacCentralEurRoman MacCroatian MacRumanian)], - 5 => [qw(koi8-f iso-8859-5 cp1251 MacCyrillic cp855 cp866)], - 7 => [qw(iso-8859-7 cp1253 MacGreek cp737 cp869)], - 8 => [qw(iso-8859-8 cp1255 MacHebrew cp862)], + 0 => [qw(cp437 -cp863)], + 1 => [qw(iso-8859-1 -cp1252 -MacRoman -cp850)], + 2 => [qw(iso-8859-2 -cp1250 -cp852 -MacCentralEurRoman -MacCroatian -MacRumanian)], + 5 => [qw(koi8-f -iso-8859-5 -cp1251 -MacCyrillic -cp855 -cp866)], + 7 => [qw(iso-8859-7 -cp1253 -MacGreek -cp737 -cp869)], + 8 => [qw(iso-8859-8 -cp1255 -MacHebrew -cp862)], ); my @request = map { if (my $input = $_) { @@ -48,6 +51,10 @@ my @request = map { if ($input =~ s/-$//) { $endpoint = $row{offset} ? $row{offset} < 160 ? 159 : 191 : 127; } + if ($row{offset}) { + $row{setnote} = 'over cp437' if $input eq 'cp850'; + $row{setnote} = 'over iso-8859-1' if $input =~ /^iso-8859-|^cp125/; + } if ($input =~ /^U([0-9a-f]+)(?:-([0-9a-f]+))?/) { my $start = hex($1) << ($2 ? 4 : 8); @@ -57,29 +64,34 @@ my @request = map { $row{set} = sprintf 'Unicode block U+%02Xxx', $start >> 8; } elsif ($input eq 'U') { - $row{table} = ' ' x 512; + $row{table} = ' ' x 1024; $row{set} = 'Unicode planes'; - $row{cell} = do 'charset-ucplanes.inc.pl'; + $row{cell} = do 'charset-ucplanes.inc.pl' + or printf "
Table data could not be read: %s.
\n", $@ || $!; + @nibble = (map { $_.0, $_.8 } 0 .. 7); + $nibsize = 8; } elsif ($row{set} = resolve_alias($input)) { if ($row{set} eq 'Internal') { - $row{table} = ' ' x ($endpoint < 255 ? 640 : 4096); + $row{table} = ' ' x ($endpoint < 255 ? 640 : 8192); $row{set} = 'Unicode BMP'; - $row{cell} = do 'charset-unicode.inc.pl'; + $row{cell} = do 'charset-unicode.inc.pl' + or printf "Table data could not be read: %s.
\n", $@ || $!; } elsif ($row{set} eq 'utf-8-strict') { $row{table} = undef; $row{set} = 'UTF-8'; - $row{cell} = do 'charset-utf8.inc.pl'; + $row{cell} = do 'charset-utf8.inc.pl' + or printf "Table data could not be read: %s.
\n", $@ || $!; } else { $row{table} = decode($row{set}, pack 'C*', $row{offset} .. $endpoint); } } else { - print "Encoding $input unknown
\n"; + say "Encoding $input
unknown
â±"; @@ -108,14 +160,19 @@ for my $row (@request) { } print ' | |
---|---|
%X', $msb + ($row->{offset} >> 4); + printf ' | |
%X', ($msb + ($row->{offset} >> 4)) * $nibsize; for my $lsb (0 .. $#nibble) { + my $val = ( ($msb<<4) + $lsb ) * $nibsize; if ($row->{cell}) { - print $row->{cell}->(($msb<<4) + $lsb); + if (ref $row->{cell} eq 'CODE') { + print $row->{cell}->($val); + next; + } + print range_cell($row->{cell}, $val); next; } - my $glyph = substr $row->{table}, ($msb<<4) + $lsb, 1; + my $glyph = substr $row->{table}, $val, 1; if ($glyph eq $NOCHAR) { print ' | '; next; |