Assume \pM marks are no less significant than other letters, and certainly
should be counted in script coverage, especially for Indian support.
Significantly affects Hebrew (missing cantillation diacritics drop
previously complete fonts to 70%).
use File::Basename 'basename';
use Data::Dump 'pp';
use File::Basename 'basename';
use Data::Dump 'pp';
use Unicode::UCD 'charinfo';
for my $code (0 .. 256**2*2) {
my $charinfo = charinfo($code) or next;
use Unicode::UCD 'charinfo';
for my $code (0 .. 256**2*2) {
my $charinfo = charinfo($code) or next;
- next if $charinfo->{category} =~ /^[MC]/; # ignore Marks and "other" Control chars
+ next if $charinfo->{category} =~ /^[C]/; # ignore "other" Control chars
push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code
for qw( script category block );
push @{ $charlist{version}->{$_} }, (chr $code) x ($agemap->{$code} <= $_)
push @{ $charlist{$_}->{ $charinfo->{$_} } }, chr $code
for qw( script category block );
push @{ $charlist{version}->{$_} }, (chr $code) x ($agemap->{$code} <= $_)