X-Git-Url: http://git.shiar.net/sheet.git/blobdiff_plain/bb412b576d883e11a4ec09d01c34a933d894af6c..49fc31c976f839b8d1d2708bcbb9acc30b061fd3:/charset.plp
diff --git a/charset.plp b/charset.plp
index f546f2d..501e26c 100644
--- a/charset.plp
+++ b/charset.plp
@@ -4,7 +4,7 @@ use strict;
use warnings;
use open IO => ':utf8';
-our $VERSION = '1.0';
+our $VERSION = 'v1.0';
$header{content_type} = 'text/html; charset=utf-8';
@@ -13,8 +13,8 @@ $header{content_type} = 'text/html; charset=utf-8';
+
charset cheat sheet
-
@@ -53,7 +53,20 @@ my @request = map {
if ($input =~ s/-$//) {
$endpoint = $row{offset} ? $row{offset} < 160 ? 159 : 191 : 127;
}
- if ($row{set} = resolve_alias($input)) {
+
+ if ($input =~ /^U([0-9a-f]+)(?:-([0-9a-f]+))?/) {
+ my $start = hex($1) << ($2 ? 4 : 8);
+ my $end = $2 ? hex($2) << 4 : $start + 240;
+ $row{table} = join '', map { chr } $start .. $end+15;
+ utf8::upgrade($row{table}); # prevent latin1 output
+ $row{set} = sprintf 'Unicode block U+%02Xxx', $start >> 8;
+ }
+ elsif ($input eq 'U') {
+ $row{table} = ' ' x 512;
+ $row{set} = 'Unicode planes';
+ $row{cell} = do 'charset-ucplanes.inc.pl';
+ }
+ elsif ($row{set} = resolve_alias($input)) {
if ($row{set} eq 'Internal') {
$row{table} = ' ' x ($endpoint < 255 ? 640 : 4096);
$row{set} = 'Unicode BMP';
@@ -100,9 +113,9 @@ print "\n";
my @nibble = (0..9, 'A'..'F');
for my $row (@request) {
- print '';
+ printf '', !$row->{cell} && ' charmap';
printf '%s', $row->{set};
- print '';
+ print '' x 17;
for my $section (qw{thead}) {
print "<$section>â±";
print ' | ', $_ for @nibble;
@@ -127,6 +140,11 @@ for my $row (@request) {
if (defined (my $mnem = $di{ord $glyph})) {
$info = $diinfo->{$mnem};
}
+ else {
+ require Unicode::UCD;
+ my $fullinfo = Unicode::UCD::charinfo(ord $glyph);
+ $info = [@$fullinfo{qw/code name category script string/}] if $fullinfo;
+ }
my ($codepoint, $name, $prop, $script, $string) = @$info;
$glyph = quote($string || $glyph);
@@ -148,16 +166,53 @@ print "\n";
:>
+
+
+ control
+ | whitespace
+ | diacritic
+ | punctuation
+ | symbol
+ | numeric
+ | greek
+ | aramaic
+ | syllabic
+ african
+ | japanese
+ | cjk
+ | chinese
+ |
+ | alphabetic
+ |
+
+
+ unicode 5.0
+ | proposed
+ | deprecated
+ | unassigned
+ | invalid
+ |
+
+
|
---|