From 92dc64088ef6dfdb273cf9cca397177d5c7e68c4 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Wed, 21 Feb 2024 20:40:44 +0100 Subject: [PATCH] digraphs: encode data include directly as json --- Makefile | 6 +++--- digraphs.plp | 2 +- tools/mkdigraphlist | 28 ++++++++++++---------------- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index f9c0565..be94112 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ word/put.js: $(download) word/%.min.js: word/%.js uglifyjs -m '' $< -o $@ -data: data/digraphs.inc.pl data/unicode-cover.inc.pl data/countries.inc.pl data/browser data/termcol-xcolor.inc.pl data/digraphs-xorg.inc.pl data/unicode-sampler word +data: data/digraphs.json data/unicode-cover.inc.pl data/countries.inc.pl data/browser data/termcol-xcolor.inc.pl data/digraphs-xorg.inc.pl data/unicode-sampler word data/DerivedAge.txt: $(download) tools/wget-ifmodified http://www.unicode.org/Public/UNIDATA/$(@F) $@ @@ -64,7 +64,7 @@ data/digraphs-plan9.txt: data/digraphs-plan9.inc.pl: tools/mkdigraphs-plan9 data/digraphs-plan9.txt $(call cmdsave,$^) -data/digraphs.inc.pl: tools/mkdigraphlist data/digraphs-rfc.inc.pl data/digraphs-vim.inc.pl data/digraphs-shiar.inc.pl data/unicode-char.inc.pl +data/digraphs.json: tools/mkdigraphlist data/digraphs-rfc.inc.pl data/digraphs-vim.inc.pl data/digraphs-shiar.inc.pl data/unicode-char.inc.pl $(call cmdsave,$<) data/unicode-char.inc.pl: tools/mkcharinfo data/digraphs-rfc.inc.pl data/digraphs-shiar.inc.pl data/unicode-age.inc.pl unicode-table.inc.pl @@ -156,7 +156,7 @@ data/browser/usage-wm.inc.pl: tools/mkusage-wikimedia data/browser/usage-wm.tsv $(call cmdsave,$^) clean: - -rm data/digraphs.inc.pl + -rm data/digraphs.json -rm data/unicode-char.inc.pl -rm data/unicode-age.inc.pl -rm -rf data/font/ diff --git a/digraphs.plp b/digraphs.plp index 2f57b36..ccb1987 100644 --- a/digraphs.plp +++ b/digraphs.plp @@ -38,7 +38,7 @@ say '

Unofficial proposals', :> <: -my $di = Data('data/digraphs'); +my $di = Data('digraphs'); if (exists $get{v}) { # show characters for inverted mnemonics (vim alternatives) diff --git a/tools/mkdigraphlist b/tools/mkdigraphlist index 3bc6ff4..5356987 100755 --- a/tools/mkdigraphlist +++ b/tools/mkdigraphlist @@ -5,8 +5,9 @@ use warnings; use utf8; use open OUT => ':encoding(utf-8)', ':std'; +use JSON (); -our $VERSION = '1.06'; +our $VERSION = '1.07'; # import and combine various digraph data push @INC, 'data'; @@ -24,18 +25,14 @@ my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} }; my $uninfo = do 'unicode-char.inc.pl' or warn "could not include unicode details: ", $@ // $!; -# output perl code of hash -# (assume no backslashes or curlies, so we can just q{} w/o escaping) -print "# automatically generated by $0\n"; -print "use utf8;\n"; -print "+{\n"; -printf '(map {$_=>0} qw{%s}),'."\n", join(' ', +# output json map of character info +my %table; +$table{$_} = 0 for ( grep { !defined $di->{$_} } map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di} ); -printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',', +$table{$_} = [ ord $di->{$_}, # original code point - map {"'$_'"} $uninfo->{ $di->{$_} }->[1] // '', # name join(' ', $rfc->{$_} @@ -45,8 +42,8 @@ printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',', ), ($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop| ex//gr, # class $uninfo->{ $di->{$_} }->[4] // (), # string -) for sort keys %{$di}; -print "}\n"; +] for sort keys %{$di}; +print JSON->new->ascii->canonical->encode(\%table); __END__ @@ -56,8 +53,7 @@ mkdigraphlist - Output character list of combined digraph data =head1 SYNOPSIS - mkdigraphlist >digraphs.inc.pl - perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]' + mkdigraphlist | jq -r '."DO"[0]' | perl -nE 'say chr' # $ =head1 DESCRIPTION @@ -72,9 +68,9 @@ is appended: character name, usage classes, unicode classes, and replacement output string. For example: - +{ - AE => [198, 'LATIN CAPITAL LETTER AE', 'u-di', 'Latin Lu Xl u-v11'], - EA => 'AE', + { + "AE" => [198, "LATIN CAPITAL LETTER AE", "u-di", "Latin Lu Xl u-v11"], + "EA" => "AE", } =head1 AUTHOR -- 2.30.0