use Data::Dump 'pp';
use PLP::Functions 'EscapeHTML';
-our $VERSION = '1.01';
+our $VERSION = '1.06';
-our $diinfo = do 'digraphs.inc.pl';
-our %di = map { $diinfo->{$_}->[0] => $_ } grep { ref $diinfo->{$_} }
- sort { length $a <=> length $b } keys %$diinfo;
+our $uc = do 'unicode-char.inc.pl';
sub new {
my ($class) = @_;
- bless { digraph => 1, unicode => 0 }, $class;
+ bless { anno => ['di', 0], style => 'di' }, $class;
}
sub glyph_info {
my ($self, $codepoint) = @_;
- if (defined (my $mnem = $di{$codepoint})) {
- return ($diinfo->{$mnem}, length $mnem == 2 ? $mnem : undef);
- }
- require Unicode::UCD;
- if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
- return [$codepoint, @$fullinfo{qw/name category script string/}];
- }
- return [$codepoint];
+ return $uc->{chr $codepoint} || eval {
+ require Unicode::UCD;
+ if (my $fullinfo = Unicode::UCD::charinfo($codepoint)) {
+ return [@$fullinfo{qw/category name - string/}];
+ }
+ } || [];
}
sub glyph_html {
my ($self, $char) = @_;
- my ($info, $mnem) = $self->glyph_info(ord $char);
- my ($codepoint, $name, $prop, $script, $string) = @$info;
+ my $codepoint = ord $char;
+ my $info = $self->glyph_info($codepoint);
+ my ($class, $name, $mnem, $html, $string) = @$info;
my $cell = EscapeHTML($string || $char);
my $title = sprintf 'U+%04X%s', $codepoint, $name && " ($name)";
- my @class = ('X', grep {$_} $prop, $script);
- $cell = "<span>$cell</span>" if $prop and $prop eq 'Zs';
+ $cell = "<span>$cell</span>" if $class =~ /\bZs\b/;
$cell = ' ' if $cell eq '';
- return ($cell, EscapeHTML($title), join(' ', @class), $mnem);
+ return ($cell, EscapeHTML($title), "X $class", $mnem, $html);
}
sub glyphs_html {
sub cell {
my ($self, $input, $html) = @_;
- my (@class, $title, $cell, $mnem);
+ my (@class, $title, $cell, $mnem, $entity);
if ($input eq '-') {
$cell = '';
}
elsif ($input eq '=') {
- push @class, 'di-invalid';
+ push @class, 'u-invalid';
$cell = '';
}
else {
push @class, 'X';
if ($input =~ s/^-//) {
- push @class, 'di-rare'; # discouraged
+ push @class, 'ex'; # discouraged
}
- ($cell, $title, my $class, $mnem) = $self->glyphs_html($input);
+ $input =~ s/^\\//; # escaped char
+ ($cell, $title, my $class, $mnem, $entity) = $self->glyphs_html($input);
- if (defined $mnem) {
- push @class, 'di-d'; # digraph
- push @class, 'di-prop' if $class =~ /\bXz\b/; # unofficial
+ if ($self->{style} eq 'di') {
+ if ($class =~ /\bu-di\b/) {
+ push @class, ('l3', 'u-di'); # standard digraph
+ }
+ elsif ($class =~ /\bu-prop\b/) {
+ push @class, ('l2', 'u-prop'); # unofficial
+ }
+ }
+ elsif ($self->{style} eq 'html') {
+ if (defined $entity) {
+ push @class, ('l3', 'u-html');
+ }
+ }
+ else {
+ my $codepoint = ord(substr $input, 0, 1);
+ if ($codepoint <= 0xFF) {
+ push @class, 'l3', 'u-lat1'; # latin1
+ }
+ elsif ($codepoint <= 0xD7FF) {
+ push @class, 'l2', 'u-bmp'; # bmp
+ }
}
if ($input =~ /[ -~]/) {
- push @class, 'di-a'; # ascii
+ push @class, 'l4', 'u-ascii'; # ascii
}
else {
- push @class, 'di-b'; # basic unicode
+ push @class, 'l1'; # basic unicode
+ }
+ }
+
+ my $anno = '';
+ if ($cell ne '') {
+ for (@{ $self->{anno} }) {
+ if (/html$/) {
+ if (defined $entity) {
+ $entity = "&$entity;" if /^&/;
+ $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($entity));
+ last;
+ }
+ }
+ elsif ($_ eq 'xml') {
+ $anno = sprintf(' <small class="digraph">%s</small>',
+ sprintf '#%d', ord($cell)
+ );
+ last;
+ }
+ elsif ($_ eq '&xml') {
+ $anno = sprintf(' <small class="digraph">%s</small>',
+ sprintf '&#%d;', ord($cell)
+ );
+ last;
+ }
+ elsif ($_ eq 'di') {
+ if (defined $mnem and length $mnem) {
+ $anno = sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem));
+ last;
+ }
+ }
+ else {
+ if ($_ eq 'hex' or $cell =~ /^[^a-zA-Z]$/) {
+ $anno = sprintf(' <small class="%s">%04X</small>', 'value', ord $cell);
+ last;
+ }
+ }
}
}
@class ? sprintf(' class="%s"', join ' ', @class) : '',
$html || '',
$cell eq '' ? ' ' : $cell,
- $self->{digraph} && defined $mnem && length $mnem
- ? sprintf(' <small class="digraph">%s</small>', EscapeHTML($mnem))
- : $self->{unicode} + $cell =~ /^[^a-zA-Z]$/ > 0
- ? sprintf(' <small class="%s">%04X</small>', 'value', ord $cell)
- : '',
+ $anno,
);
}
for my $cell (@$digraphs) {
if ($cell =~ s/^\.//) {
# dot indicates start of a new row
- push @rows, '';
+ push @rows, '<tr>';
if ($cell =~ s/^>//) {
# header cell text follows
$cell =~ s/_/ /g; # underscores may be used instead of whitespace (for qw//ability)
}
return sprintf qq{<table class="glyphs%s">\n%s</table>\n},
- $self->{digraph} || $self->{unicode} >= 0 ? ' dilabel' : '',
- join '', map {"<tr>$_\n"} @rows;
+ @{ $self->{anno} } ? ' dilabel' : '',
+ join '', map {"$_\n"} @rows;
}
sub print {
my $self = shift;
while (@_) {
- printf '<div class="section"><h2>%s</h2>'."\n\n", shift;
+ print '<div class="section">';
+ printf '<h2>%s</h2>', shift unless ref $_[0];
+ print "\n\n";
while (ref $_[0] and $_ = shift) {
print $self->table($_);
}
- print '</div>';
+ print "\n</div>";
}
}