git.shiar.nl
/
sheet.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
digraphs: encode data include directly as json
[sheet.git]
/
tools
/
mkdigraphlist
diff --git
a/tools/mkdigraphlist
b/tools/mkdigraphlist
index 3bc6ff4e751927024e318ffc289cb42d176e627e..535698712117af07f6f162873bbf12eecc7af747 100755
(executable)
--- a/
tools/mkdigraphlist
+++ b/
tools/mkdigraphlist
@@
-5,8
+5,9
@@
use warnings;
use utf8;
use open OUT => ':encoding(utf-8)', ':std';
use utf8;
use open OUT => ':encoding(utf-8)', ':std';
+use JSON ();
-our $VERSION = '1.0
6
';
+our $VERSION = '1.0
7
';
# import and combine various digraph data
push @INC, 'data';
# import and combine various digraph data
push @INC, 'data';
@@
-24,18
+25,14
@@
my $di = { %{$vim // {}}, %{$rfc}, %{$extra // {}} };
my $uninfo = do 'unicode-char.inc.pl'
or warn "could not include unicode details: ", $@ // $!;
my $uninfo = do 'unicode-char.inc.pl'
or warn "could not include unicode details: ", $@ // $!;
-# output perl code of hash
-# (assume no backslashes or curlies, so we can just q{} w/o escaping)
-print "# automatically generated by $0\n";
-print "use utf8;\n";
-print "+{\n";
-printf '(map {$_=>0} qw{%s}),'."\n", join(' ',
+# output json map of character info
+my %table;
+$table{$_} = 0 for (
grep { !defined $di->{$_} }
map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di}
);
grep { !defined $di->{$_} }
map { substr($_, 1, 1).substr($_, 0, 1) } sort keys %{$di}
);
-printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',',
+$table{$_} = [
ord $di->{$_}, # original code point
ord $di->{$_}, # original code point
- map {"'$_'"}
$uninfo->{ $di->{$_} }->[1] // '', # name
join(' ',
$rfc->{$_}
$uninfo->{ $di->{$_} }->[1] // '', # name
join(' ',
$rfc->{$_}
@@
-45,8
+42,8
@@
printf "q{%s}=>[%s],\n", s/(?=[\\}])/\\/gr, join(',',
),
($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop| ex//gr, # class
$uninfo->{ $di->{$_} }->[4] // (), # string
),
($uninfo->{ $di->{$_} }->[0] // '') =~ s/ u-di| u-prop| ex//gr, # class
$uninfo->{ $di->{$_} }->[4] // (), # string
-
)
for sort keys %{$di};
-print
"}\n"
;
+
]
for sort keys %{$di};
+print
JSON->new->ascii->canonical->encode(\%table)
;
__END__
__END__
@@
-56,8
+53,7
@@
mkdigraphlist - Output character list of combined digraph data
=head1 SYNOPSIS
=head1 SYNOPSIS
- mkdigraphlist >digraphs.inc.pl
- perl -e'$di = do "digraphs.inc.pl"; print chr $di->{DO}->[0]'
+ mkdigraphlist | jq -r '."DO"[0]' | perl -nE 'say chr' # $
=head1 DESCRIPTION
=head1 DESCRIPTION
@@
-72,9
+68,9
@@
is appended: character name, usage classes, unicode classes,
and replacement output string.
For example:
and replacement output string.
For example:
-
+
{
-
AE => [198, 'LATIN CAPITAL LETTER AE', 'u-di', 'Latin Lu Xl u-v11'
],
-
EA => 'AE'
,
+
{
+
"AE" => [198, "LATIN CAPITAL LETTER AE", "u-di", "Latin Lu Xl u-v11"
],
+
"EA" => "AE"
,
}
=head1 AUTHOR
}
=head1 AUTHOR