tools/mkcharinfo: comment alternative nbsp stringification

[sheet.git] / digraphs.plp
diff --git a/digraphs.plp b/digraphs.plp

index 93dfd75b698b115a5a44898c1bc4f5b8e4b07dea..4c45044c97f122ba8c0e3feeb3ba078c42aecb1a 100644 (file)
--- a/digraphs.plp
+++ b/digraphs.plp
@@ -1,155 +1,147 @@
-<:
-use utf8;
-use strict;
-use warnings;
-use open IO => ':utf8';
-
-use Unicode::UCD qw(charinfo);
+<(common.inc.plp)><:
+use 5.010;  # state
+
+Html({
+       title => 'digraph cheat sheet',
+       version => 'v1.1',
+       description => [
+               "Complete table of digraph characters from RFC-1345.",
+       ],
+       keywords => [qw'
+               digraph mnemonic compose composition pair
+               character char glyph table unicode vim
+       '],
+       stylesheet => [qw'light'],
+       data => [qw( digraphs.inc.pl )],
+});
  
-our $VERSION = '1.0';
-
-$header{content_type} = 'text/html; charset=utf-8';
+:>
+<h1>RFC-1345 Digraphs</h1>
  
-:><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
-<html>
+<p>Character mnemonics
+following composition key ⎄:
+i^k in <a href="/vi">Vim</a>,
+^u^\ in <a href="/readline">Emacs</a>,
+^a^v in <a href="/screen">Screen</a>.
+Also see <a href="/unicode">common Unicode</a>.</p>
  
-<head>
-<title>digraph cheat sheet</title>
-<meta http-equiv="content-type" content="utf-8">
-<style>
-h1 {
-       text-align: center;
-       margin: 0 auto 0.2em;
-}
-table {
-       border-collapse: collapse;
-       table-layout: fixed; /* prevent resizing, notably in msie6 */
-}
-#legend {
-       margin-top: 1em;
-}
-#legend table {
-       width: 100%;
-       table-layout: auto;
-#}
-#legend td {
-       padding: 0 0.2em;
-}
-thead th, td {
-       width: 1.2em; /* msie only looks at the first row */
-       min-width: 1em; /* prevents gecko from restricting to page width */
-}
-th, td {
-       text-align: center;
-}
-td {
-       border: 1px solid #888;
-       background: #DDD;
-}
-td.X {
-       background: #FFF;
-}
+<p class="aside">Unofficial <span class="u-prop ex">proposals</span>
+are available as <a href="/digraphs.vim">ex commands</a>.</p>
  
-td.Lm, td.Mc, td.Me, td.Zl, td.Zp, td.Cs {background:red} /* unknown */
-
-/* letters */
-td.Greek    {background: #FFE0CF}
-td.Cyrillic {background: #FFDDA8}
-td.Latin    {background: #FFB}
-td.Hebrew   {background: #FFD}
-td.Arabic   {background: #EFE}
-td.Hiragana {background: #DFC}
-td.Katakana {background: #DFA}
-td.Bopomofo {background: #BFC}
-
-td.Nd, td.Nl,
-td.No {background: #FBB} /* number */
-td.Sc {background: #FCD} /* currency */
-td.Sm {background: #ECE} /* math */
-td.So {background: #DDCCFF} /* symbol */
-td.Cf, td.Pd,
-td.Po {background: #CDF} /* punctuation */
-td.Ps, td.Pe, td.Pi,
-td.Pf {background: #BEF} /* quote */
-td.Lm,
-td.Sk {background: #CEE} /* spacing modifier */
-td.Mn {background: #ACC} /* modifier */
-td.Cc {background: #BBB; color: #666} /* control */
-td.Zs {background: #ACB} /* space */
-td.Zs span {background: #EEE}
-
-td.Xa {color: #040} /* ascii */
-td.Xl {color: #080} /* latin1 */
-td.Co {color: #800} /* private */
-td.Xz {color: #F00} /* proposed */
-
-tr:hover td {
-       background: #FF8;
+<:
+my $di = do 'digraphs.inc.pl'
+       or die "Error loading digraphs data: ", $@ // $!;
+
+if (exists $get{v}) {
+       # show characters for inverted mnemonics (vim alternatives)
+       $di->{ substr($_, 1, 1) . substr($_, 0, 1) } ||=
+               [ $di->{$_}->[0], '', 'l0 ex', '', $di->{$_}->[4] ]
+               for grep { ref $di->{$_} } keys %{$di};
  }
-</style>
-</head>
  
-<body>
-<h1>RFC-1345 Digraphs</h1>
+my @chars = (
+       [qw{! " % ' ( ) * + , - . /}],
+       ['0'..'9'], [qw{: ; < = > ?}],
+       ['A'..'M'], ['N'..'Z'],
+       ['a'..'m'], ['n'..'z'],
+);
+my @chars2 = (['_'], @chars);  # trailing character (extended set)
+my @columns = !exists $get{split} ? \@chars2 :
+       ([@chars2[0, 1, 3, 4, 6]], [@chars2[2, 5, 7]]);
+
+if (exists $get{xorg}) {
+       my $xorg = do 'data/digraphs-xorg.inc.pl'
+               or die "Error loading Xorg data: ", $@ // $!;
+       $_ = [ord $_] for values %{$xorg};
+       $xorg->{$_}->[2] = # class = compatibility
+               $di->{$_} ? $di->{$_}->[0] != $xorg->{$_}->[0] ? 'l1' :  # conflict
+               $di->{$_}->[2] =~ /\bu-di\b/ ? 'l5' : 'l3' : 'l2'  # rfc|any|none
+               for keys %{$xorg};
+
+       for my $cp (map {$_->[0]} values %{$xorg}) {
+               next if (state $seen = {})->{$cp}++;  # List::MoreUtils::uniq
+
+               # find multiple equivalent mnemonics
+               my @equiv = grep {$cp eq $_->[0]}
+                       map {$xorg->{$_}} sort keys %{$xorg}; # values ordered by mnem.
+
+               # search for the most compatible match
+               my ($compat) = sort {
+                       $equiv[$b]->[2] cmp $equiv[$a]->[2]  # highest level
+                       || $b <=> $a  # fallback to last mnemonic
+               } 0 .. $#equiv;
+
+               # reclassify all but one as level 0 (omitted)
+               splice @equiv, $compat // -1, 1, ();
+               $_->[2] = 'l0 ex' for @equiv;
+       }
  
-<:
-my $di = do 'digraphs.inc.pl';
-
-sub quote {
-       local $_ = shift;
-       s/"/&quot;/g;
-       s/</&lt;/g;
-       s/>/&gt;/g;
-       return $_;
+       $chars2[0] = [qw( # ^ _ ` ~ )];
+       @chars = @chars2;
+       $di = $xorg;
  }
  
-my @chars = ((map {chr} ord '!' .. ord 'Z'), 'a'..'z');
-splice @chars, $_, 1, () for 2, 3-1, 5-2, 31-3;  # remove character exceptions # $ & @
-my @chars2 = (@chars, '_');  # trailing character (extended set)
-
-print '<table>';
+for my $colchars (@columns) {
+print '<table class="glyphs dimap"><col>';
+print qq'<colgroup span="$_">' for map {scalar @$_} @{$colchars};
+print "</colgroup><col>\n";
  for my $section (qw{thead tfoot}) {
-       print "<$section><tr><th>&nbsp;";
-       print "<th>$_" for @chars2;
+       print "<$section><tr><th>↳";
+       print '<th>', EscapeHTML($_) for map {@$_} @{$colchars};
+       print "<th>&nbsp;\n";
  }
-print '<tbody>';
-for my $c1 (@chars) {
-       print "<tr><th>$c1";
-       for my $c2 (@chars2) {
-               my $mnem = $c1 . $c2;
-               if (not defined $di->{$mnem}) {
-                       print '<td>';
-                       next;
+for my $c1group (@chars) {
+       print '<tbody>';
+       for my $c1 (@$c1group) {
+               print '<tr><th>', EscapeHTML($c1);
+               for my $c2 (map {@$_} @$colchars) {
+                       my $mnem = $c1 . $c2;
+                       if (not defined $di->{$mnem}) {
+                               print '<td>';
+                               next;
+                       }
+                       if (ref $di->{$mnem} ne 'ARRAY') {
+                               printf '<td class="X Xr" title="%s">', EscapeHTML($mnem);
+                               next;
+                       }
+                       my ($codepoint, $name, $prop, $script, $string) = @{ $di->{$mnem} };
+
+                       my $glyph = $string || chr $codepoint;
+                       utf8::upgrade($glyph);  # prevent latin1 output
+                       my $desc = $mnem . ($name && " ($name)");
+                       my @class = ('X', grep {$_} $prop, $script);
+
+                       $glyph = EscapeHTML($glyph);
+                       $glyph = "<span>$glyph</span>" if $script =~ /\bZs\b/;
+
+                       printf "\n".'<td class="%s" title="%s">%s',
+                               join(' ', @class), EscapeHTML($desc), $glyph;
                 }
-               my $chr = $di->{$mnem};
-               my $glyph = chr $chr;
-               utf8::upgrade($glyph);  # prevent latin1 output
-               my $info = charinfo($chr);
-
-               my $desc = $mnem;
-               $desc .= " ($_)" for $info->{name} || ();
-
-               my @class = 'X';
-               push @class, $_ for $info->{category} || ();
-               push @class, $_ for $info->{script} || ();
-
-               $glyph = quote($glyph);
-               $glyph = "<span>$glyph</span>" if $info->{category} eq 'Zs';
-
-               printf "\n".'<td class="%s" title="%s">%s',
-                       join(' ', @class), quote($desc), $glyph;
+               print "\n<th>", EscapeHTML($c1), "\n";
         }
-       print "\n<th>$c1\n";
  }
  print "</table>\n";
+print '<hr>' if exists $get{split};
+}
+
+if (exists $get{xorg}) {
  :>
-<div id="legend">
-       <table><tr>
+<div class="legend">
+       <table class="glyphs"><tr>
+       <td class="X l5">matching RFC-1345
+       <td class="X l3">matching proposal
+       <td class="X l2">unique to Xorg
+       <td class="X l1">conflict
+       <td class="X l0 ex">duplicate
+       </table>
+</div>
+<: } else { :>
+<div class="legend">
+       <table class="glyphs"><tr>
         <td class="X Cc">control
-       <td class="X Zs"><span>spacing</span>
-       <td class="X Mn">modifier
-       <td class="X Sk">spacing modifier
+       <td class="X Zs"><span>space</span>
+       <td class="X Mn">combining
+       <td class="X Sk">spacing&nbsp;modifier
         <td class="X Pf">quote
         <td class="X Po">punctuation
         <td class="X So">symbol
@@ -161,17 +153,19 @@ print "</table>\n";
         <td class="X Latin">latin
         <td class="X Hebrew">hebrew
         <td class="X Arabic">arabic
+       <td class="X Hangul">korean
         <td class="X Hiragana">japanese
         <td class="X Bopomofo">chinese
         </table>
  
-       <table><tr>
+       <table class="glyphs"><tr>
         <td class="X">unicode
-       <td class="X Xa">ascii
         <td class="X Xl">latin1
-       <td class="X Co">private
-       <td class="X Xz">proposed
+       <td class="X Xa">ascii
+       <td class="X u-prop">vim extension
+       <td class="X u-prop ex">proposal
+       <td class="X ex">not in vim
         </table>
  </div>
  
-</html>
+<: }