X-Git-Url: http://git.shiar.net/sheet.git/blobdiff_plain/687c60b9f476bc59a73e695af4d5d9d007830e41..92dc64088ef6dfdb273cf9cca397177d5c7e68c4:/tools/mkdigraphlist diff --git a/tools/mkdigraphlist b/tools/mkdigraphlist index ecd825f..5356987 100755 --- a/tools/mkdigraphlist +++ b/tools/mkdigraphlist @@ -4,43 +4,46 @@ use strict; use warnings; use utf8; -use open OUT => ':utf8', ':std'; +use open OUT => ':encoding(utf-8)', ':std'; +use JSON (); -our $VERSION = '1.05'; +our $VERSION = '1.07'; # import and combine various digraph data -my $rfc = do 'data/digraphs-rfc.inc.pl' +push @INC, 'data'; +my $rfc = do 'digraphs-rfc.inc.pl' or die "error reading digraphs include: ", $@ // $!; -my $extra = do 'data/digraphs-shiar.inc.pl' +my $extra = do 'digraphs-shiar.inc.pl' or warn "could not include shiar proposals: ", $@ // $!; -my $vim = do 'data/digraphs-vim.inc.pl' +my $vim = do 'digraphs-vim.inc.pl' or warn "could not include vim extensions ", $@ // $!; +my $vimold = do 'digraphs-vim-74.inc.pl' + or warn "could not include vim compatibility ", $@ // $!; my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} }; # optionally get unicode character information my $uninfo = do 'unicode-char.inc.pl' or warn "could not include unicode details: ", $@ // $!; -# output perl code of hash -# (assume no backslashes or curlies, so we can just q{} w/o escaping) -print "# automatically generated by $0\n"; -print "use utf8;\n"; -print "+{\n"; -printf '(map {$_=>0} qw{%s}),'."\n", join(' ', +# output json map of character info +my %table; +$table{$_} = 0 for ( grep { !defined $di->{$_} } map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di} ); -printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',', +$table{$_} = [ ord $di->{$_}, # original code point - map {"'$_'"} - $uninfo->{ $di->{$_} }->[1], # name - $vim->{$_} - ? $rfc->{$_} ? 'u-di' : 'u-prop' # vim+rfc or vim-only - : 'u-prop ex', # neither - $uninfo->{ $di->{$_} }->[0] =~ s/ u-di| u-prop| ex//gr, # class + $uninfo->{ $di->{$_} }->[1] // '', # name + join(' ', + $rfc->{$_} + ? $vim->{$_} ? 'l4' : 'l1' # vim+rfc or rfc only + : $vim->{$_} ? 'l3' : 'l2', # vim only or neither + $vimold && $vim->{$_} && !$vimold->{$_} ? 'ex' : (), # new vim feature + ), + ($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop| ex//gr, # class $uninfo->{ $di->{$_} }->[4] // (), # string -) for sort keys %{$di}; -print "}\n"; +] for sort keys %{$di}; +print JSON->new->ascii->canonical->encode(\%table); __END__ @@ -50,8 +53,7 @@ mkdigraphlist - Output character list of combined digraph data =head1 SYNOPSIS - mkdigraphlist >digraphs.inc.pl - perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]' + mkdigraphlist | jq -r '."DO"[0]' | perl -nE 'say chr' # $ =head1 DESCRIPTION @@ -66,9 +68,9 @@ is appended: character name, usage classes, unicode classes, and replacement output string. For example: - +{ - AE => [198, 'LATIN CAPITAL LETTER AE', 'u-di', 'Latin Lu Xl u-v11'], - EA => 'AE', + { + "AE" => [198, "LATIN CAPITAL LETTER AE", "u-di", "Latin Lu Xl u-v11"], + "EA" => "AE", } =head1 AUTHOR