X-Git-Url: http://git.shiar.net/sheet.git/blobdiff_plain/e5ad6046eceee35635afa188230707a5ae14caa9..885337ac8f6df8b7971248d0c31eed25ab5b8123:/tools/mkdigraphs-xorg diff --git a/tools/mkdigraphs-xorg b/tools/mkdigraphs-xorg index 32a5044..be53fff 100755 --- a/tools/mkdigraphs-xorg +++ b/tools/mkdigraphs-xorg @@ -2,39 +2,100 @@ use 5.014; use warnings; use utf8; -use open IO => ':utf8', ':std'; +use open IO => ':encoding(utf-8)', ':std'; +use re '/msx'; +use JSON 'decode_json'; use Data::Dump 'pp'; +use Shiar_Sheet::FormatChar; -our $VERSION = '1.00'; +our $VERSION = '1.01'; -open my $keysymh, '<', '/usr/include/X11/keysymdef.h' - or die "Could not find keysym definitions: $!\n"; +my $matchvim; # enable to prefer best compatibility -my %keysym; -while (readline $keysymh) { - m{ - \A [#]define[ ]XK_ (?[a-zA-Z_0-9]+) - \h+ 0x(?[0-9a-f]+) - \h* [/][*] [\h(] U[+] (?[0-9A-F]{4,6}) - }msx or next; - $keysym{ $+{name} } = chr hex $+{unicode}; -} +my $symname = eval { + open my $keysymh, '<', 'data/keysymdef.json' or die $!; + local $/; + return decode_json(readline $keysymh); +} or die "Could not read keysym definitions: $@\n"; -say "# automatically generated by $0"; -say '+{'; +my $vidi = eval { + open my $jsfh, '<', 'data/digraphs.json' or die $!; + local $/; + return JSON->new->decode(readline $jsfh); +} or warn "Could not read comparison digraphs: $@\n"; +my %table; while ($_ = readline) { - my ($mnem, $chr, $trail) = /^\h(.*?)\h+:\h"([^"]+)"\h*(.*)/ + my ($mnem, $chr, $trail) = m/\A \h (.*?) \h+ : \h "([^"]+)" \h* (.*)/ or next; $chr =~ s/\\(.)/$1/g; - $mnem !~ /]+)> ?}{$keysym{$1} // die "reference to unknown keysym $1\n"}eg; - $mnem !~ /[^ -\x7F]/ or next; # skip unicode -# (state $seen = {})->{$chr}++ and next; - printf "%s => %s,\n", pp($mnem), pp($chr); + $mnem !~ m/]+)> \h?}{$symname->{$1} // die "reference to unknown keysym $1\n"}eg; + 1; + } or warn($@), next; + $mnem =~ m/\A [\x20-\x7F]{2} \z/ or next; # only interested in two ascii + + my $alias = \(state $seen = {})->{$chr}; # assume first is preferred + my $cp = ord $chr; + my ($class, $name, undef, undef, $string) = @{ + Shiar_Sheet::FormatChar->glyph_info($cp) + }; + my $comparison = ( + !$vidi->{key}->{$mnem} ? 'l3' : # free + $vidi->{key}->{$mnem}->[0] != $cp ? 'l1' : # conflict + $vidi->{key}->{$mnem}->[2] eq 'l5' ? 'l5' : # rfc + 'l4' # any + ); + + if (${$alias}) { + # aliases an earlier occurrence + if ($matchvim and ${$alias}->[2] lt $comparison) { + # replace lower compatibility level + ${$alias}->[3] = 'l0'; + ${$alias}->[2] .= ' u-' . ${$alias}->[2]; + ${$alias} = undef; + } + else { + $class = 'l0'; + my $menm = substr($mnem, 1, 1).substr($mnem, 0, 1); + if ($table{$menm} && $table{$menm}[0] == $cp) { + # unannotated if identical to reversed input + $cp = 0; + } + else { + $class .= ' ex'; + } + } + } + + $table{$mnem} = [ $cp, $name, $comparison, $class, $string // () ]; + ${$alias} //= $table{$mnem}; } -say '}'; +print JSON->new->canonical->indent->encode({ + title => 'X.Org', + key => \%table, + intro => join("\n", + 'Character mnemonics following compose key ⎄:', + 'in the X Window System (Shift+AltGr by default).', + 'Differences from RFC-1345 are indicated.', + 'Also see common Unicode.', + ), + keywords => [qw( xorg x11 x )], + flag => { + 'l5' => "matching RFC-1345", + 'l4' => "matching Vim extension", + 'l3' => "unique to Xorg", + 'l1' => "conflict", + ('l0' => "Xorg preference") x !!$matchvim, + 'l0 ex' => "alias", + }, + flagclass => { + l5 => 'u-l4', + l4 => 'u-l5', + }, +}); __END__ @@ -45,13 +106,13 @@ mkdigraphs-xorg - Output Xorg compose sequences =head1 SYNOPSIS - mkdigraphs-xorg /usr/share/X11/locale/en_US.UTF-8/Compose >digraphs-xorg.inc.pl - perl -e'$di = do "digraphs-xorg.inc.pl"; print chr $di->{AT}' + mkdigraphs-xorg /usr/share/X11/locale/en_US.UTF-8/Compose | + jq -r '.key."AT"[0]' | perl -nE 'say chr' # @ =head1 DESCRIPTION Extracts Multi_key definitions from X11/Xorg Compose.pre include file. -If successful, Perl code is output resulting in a hash +If successful, a JSON object is output containing a digraphs list in C with Unicode code points keyed by mnemonics. Any errors and warnings are given at STDERR.