-all: unicode-char.inc.pl ttfsupport
+all: unicode-char.inc.pl unicode-cover.inc.pl
data/rfc1345.txt:
wget http://www.ietf.org/rfc/rfc1345.txt -O $@
mkdir ttfsupport
tools/convert-allfonts
+unicode-cover.inc.pl: ttfsupport tools/mkfontinfo
+ tools/mkfontinfo >$@
+
clean:
-rm digraphs.inc.pl
-rm unicode-char.inc.pl
-rm -rf ttfsupport/
+ -rm unicode-cover.inc.pl
--- /dev/null
+#!/usr/bin/env perl
+use 5.010;
+use strict;
+use warnings;
+use utf8;
+
+use open OUT => ':utf8', ':std';
+use List::Util 'reduce';
+use File::Basename 'basename';
+use Data::Dump 'pp';
+
+our $VERSION = '1.00';
+
+my %font;
+my $incsuffix = '.inc.pl';
+for my $fontfile (glob 'ttfsupport/*'.$incsuffix) {
+ my ($fontid) = basename($fontfile, $incsuffix);
+ my ($fontmeta, @fontrange) = do $fontfile or next;
+ $font{$fontid} = {
+ -id => $fontmeta->{id} || $fontid,
+ -name => $fontmeta->{name},
+ map { (chr $_ => 1) } @fontrange
+ };
+}
+
+my @chargroups = qw(
+ N Z Math
+ Assigned
+ Latin Greek Cyrillic Georgian Arabic Thai Hangul Han
+);
+
+say 'use utf8;';
+say '+{';
+for my $name (@chargroups) {
+ my $match = qr/\A\p{$name}\z/;
+ my @chars = eval {
+ grep { m/$match/ } map { chr $_ }
+ 0..0xD7FF, 0xE000..0xFDCF, 0xFDF0..0xFFFD,
+ } or do {
+ warn $@;
+ next;
+ };
+
+ my %cover = map {
+ my $fontcover = $font{$_};
+ ($_ => scalar grep { $fontcover->{$_} } @chars);
+ } keys %font;
+ $cover{-count} = scalar @chars;
+ $cover{-chars} = [ map { ord } sort @chars ];
+
+ say $name.' => '.pp(\%cover).',';
+}
+say '}';
+
+__END__
+
+=head1 NAME
+
+mkfontinfo - Prepare font coverage of various character groups
+
+=head1 SYNOPSIS
+
+ mkfontinfo > unicode-cover.inc.pl
+
+Test by finding the number of cyrillic characters in DejaVu Sans:
+
+ perl -E'$f = do "unicode-cover.inc.pl"; say $f->{Cyrillic}->{dvsans}'
+
+=head1 AUTHOR
+
+Mischa POSLAWSKY <perl@shiar.org>
+
+=head1 LICENSE
+
+Licensed under the GNU Affero General Public License version 3.
+