use utf8;
-my %uniblock = (
- 0x0000, '<td colspan="1" class="X Po">ascii',
- 0x0008, '<td colspan="4" class="X L Latin">latin',
-# 0x0028, '<td colspan="5" class="X Sk">spacing modifier',
- 0x0028, '<td colspan="2" class="X Mn">comb',
- 0x0038, '<td colspan="1" class="X L Greek">grk',
- 0x0040, '<td colspan="2" class="X L Cyrillic">cyr',
- 0x0050, '<td colspan="1" class="X L Armenian">arm',
- 0x0058, '<td colspan="1" class="X L Aramaic">heb',
- 0x0060, '<td colspan="2" class="X L Arabic">arabic',
- 0x0070, '<td colspan="3" class="X L Aramaic">aram',
- 0x0080, '<td colspan="2" class="X L Aramaic">aramaic',
- 0x0090, '<td colspan="14" class="X L Brahmic">brahmic',
- 0x0100, '<td colspan="1" class="X L Brahmic">mm',
- 0x0108, '<td colspan="1" class="X L Aramaic">geor',
- 0x0110, '<td colspan="2" class="X L Hangul">jamo',
- 0x0120, '<td colspan="3" class="X L African">ethiopic',
- 0x0138, '<td colspan="6" class="X L Syllabic">aboriginal',
- 0x0168, '<td colspan="1" class="X L Alpha">ger',
- 0x0170, '<td colspan="2" class="X L Brahmic">brahm',
- 0x0180, '<td colspan="1" class="X L Aramaic">mon',
- 0x0188, '<td colspan="1" class="X L Syllabic">can',
- 0x0190, '<td colspan="8" class="X L Brahmic">brahmic',
- 0x01D0, '<td colspan="4" class="X L Latin">extensions',
- 0x01F0, '<td colspan="2" class="X L Greek">greek',
- 0x0200, '<td colspan="1" class="X Po">·…',
- 0x0208, '<td colspan="3" class="X So">symbols',
- 0x0220, '<td colspan="2" class="X Sm">maths',
- 0x0230, '<td colspan="3" class="X So">technical',
- 0x0248, '<td colspan="1" class="X Latin">()',
- 0x0250, '<td colspan="2" class="X So">draw',
- 0x0260, '<td colspan="4" class="X So">symbols',
- 0x0280, '<td colspan="2" class="X L Alpha">braille',
- 0x0290, '<td colspan="1" class="X So">arr',
- 0x0298, '<td colspan="3" class="X Sm">maths',
- 0x02B0, '<td colspan="2" class="X So">misc',
- 0x02C0, '<td colspan="2" class="X L Greek">ancient',
- 0x02D0, '<td colspan="2" class="X L Alpha">ext',
- 0x02E0, '<td colspan="1" class="X Po">·+',
- 0x02E8, '<td colspan="3" class="X L Han">radicals',
- 0x0300, '<td colspan="2" class="X L Katakana">japanese',
- 0x0310, '<td colspan="4" class="X L Han">cjk+',
- 0x0330, '<td colspan="2" class="X Xd L Han">compat',
- 0x0340, '<td colspan="8" class="X L Han" style="border-bottom:none">',
- 0x0380, '<td colspan="16" rowspan="2" class="X L Han" style="border-top:none">cjk ideographs A', #+2
- 0x04E0, '<td colspan="16" rowspan="11" class="X L Han">cjk unified ideographs',
- 0x0A00, '<td colspan="9" class="X L Syllabic">yi',
- 0x0A48, '<td colspan="1" class="X L Latin">lisu',
- 0x0A50, '<td colspan="2" class="X L Syllabic">vai',
- 0x0A60, '<td colspan="1" class="X L Cyrillic">cyr',
- 0x0A68, '<td colspan="1" class="X L Syllabic">bam',
- 0x0A70, '<td colspan="2" class="X L Latin">lat-D',
- 0x0A80, '<td colspan="6" class="X L Brahmic">brahmic',
- 0x0AB0, '<td colspan="2" class="X L Alpha">ext',
- 0x0AC0, '<td colspan="8" class="X L Hangul" style="border-bottom:none">',
- 0x0B00, '<td colspan="16" rowspan="5" class="X L Hangul" style="border-top:none">hangeul syllables',
- 0x0D80, '<td colspan="16" class="X Cs">surrogates',
- 0x0E00, '<td colspan="16" rowspan="3" class="X Co" style="border-bottom:none">private use',
- 0x0F80, '<td colspan="2" class="X Co" style="border-top:none">',
- 0x0F90, '<td colspan="4" class="X L Han">cjk compat',
- 0x0FB0, '<td colspan="8" class="X L Arabic">presentation',
- 0x0FF0, '<td colspan="2" class="X L Latin">width',
-);
-
-sub {
- return defined $uniblock{$_[0]} ? $uniblock{$_[0]} : ();
-}
-
++{
+ 0x0000 => [0x008, 'X Po', 'ascii'],
+ 0x0008 => [0x020, 'X L Latin', 'latin'],
+ 0x0028 => [0x010, 'X Mn', 'comb'], # also spacing Sk
+ 0x0038 => [0x008, 'X L Greek', 'grk'],
+ 0x0040 => [0x010, 'X L Cyrillic', 'cyr'],
+ 0x0050 => [0x008, 'X L Armenian', 'arm'],
+ 0x0058 => [0x008, 'X L Aramaic', 'heb'],
+ 0x0060 => [0x010, 'X L Arabic', 'arabic'],
+ 0x0070 => [0x010, 'X L Aramaic', 'aram'],
+ 0x0080 => [0x010, 'X L Aramaic', 'aramaic'],
+ 0x0090 => [0x070, 'X L Brahmic', 'brahmic'],
+ 0x0100 => [0x008, 'X L Brahmic', 'mm'],
+ 0x0108 => [0x008, 'X L Aramaic', 'geor'],
+ 0x0110 => [0x010, 'X L Hangul', 'jamo'],
+ 0x0120 => [0x018, 'X L African', 'ethiopic'],
+ 0x0138 => [0x030, 'X L Syllabic', 'aboriginal'],
+ 0x0168 => [0x008, 'X L Alpha', 'ger'],
+ 0x0170 => [0x010, 'X L Brahmic', 'brahm'],
+ 0x0180 => [0x008, 'X L Aramaic', 'mon'],
+ 0x0188 => [0x008, 'X L Syllabic', 'can'],
+ 0x0190 => [0x040, 'X L Brahmic', 'brahmic'],
+ 0x01D0 => [0x020, 'X L Latin', 'extensions'],
+ 0x01F0 => [0x010, 'X L Greek', 'greek'],
+ 0x0200 => [0x008, 'X Po', '·…'],
+ 0x0208 => [0x018, 'X So', 'symbols'],
+ 0x0220 => [0x010, 'X Sm', 'maths'],
+ 0x0230 => [0x018, 'X So', 'technical'],
+ 0x0248 => [0x008, 'X Latin', '()'],
+ 0x0250 => [0x010, 'X So', 'draw'],
+ 0x0260 => [0x020, 'X So', 'symbols'],
+ 0x0280 => [0x010, 'X L Alpha', 'braille'],
+ 0x0290 => [0x008, 'X So', 'arr'],
+ 0x0298 => [0x018, 'X Sm', 'maths'],
+ 0x02B0 => [0x010, 'X So', 'misc'],
+ 0x02C0 => [0x010, 'X L Greek', 'ancient'],
+ 0x02D0 => [0x010, 'X L Alpha', 'ext'],
+ 0x02E0 => [0x008, 'X Po', '·+'],
+ 0x02E8 => [0x018, 'X L Han', 'radicals'],
+ 0x0300 => [0x010, 'X L Katakana', 'japanese'],
+ 0x0310 => [0x020, 'X L Han', 'cjk+'],
+ 0x0330 => [0x010, 'X Xd L Han', 'compat'],
+ 0x0340 => [0x1A0, 'X L Han', 'cjk ideographs A'],
+ 0x04E0 => [0x520, 'X L Han', 'cjk unified ideographs'],
+ 0x0A00 => [0x048, 'X L Syllabic', 'yi'],
+ 0x0A48 => [0x008, 'X L Latin', 'lisu'],
+ 0x0A50 => [0x010, 'X L Syllabic', 'vai'],
+ 0x0A60 => [0x008, 'X L Cyrillic', 'cyr'],
+ 0x0A68 => [0x008, 'X L Syllabic', 'bam'],
+ 0x0A70 => [0x010, 'X L Latin', 'lat-D'],
+ 0x0A80 => [0x030, 'X L Brahmic', 'brahmic'],
+ 0x0AB0 => [0x010, 'X L Alpha', 'ext'],
+ 0x0AC0 => [0x2C0, 'X L Hangul', 'hangeul syllables'],
+ 0x0D80 => [0x080, 'X Cs', 'surrogates'],
+ 0x0E00 => [0x190, 'X Co', 'private use'],
+ 0x0F90 => [0x020, 'X L Han', 'cjk compat'],
+ 0x0FB0 => [0x040, 'X L Arabic', 'presentation'],
+ 0x0FF0 => [0x010, 'X L Latin', 'width'],
+ 0x1000 => [0x010, 'X L Syllabic', 'linear B'],
+ 0x1010 => [0x010, 'X No', 'a num'],
+ 0x1020 => [0x040, 'X L Alpha', 'ltr'],
+ 0x1060 => [0x018, 'X L Syllabic', 'linear A'],
+ 0x1078 => [0x008, 'X L Alpha', 'ltr'],
+ 0x1080 => [0x080, 'X L Aramaic', 'rtl'],
+ 0x1100 => [0x100, 'X L Brahmic', 'brahmic'],
+ 0x1200 => [0x100, 'X L Syllabic', 'cuneiform'],
+ 0x1300 => [0x100, 'X L Syllabic', 'egyptian hieroglyphs'],
+ 0x1400 => [0x200, 'X L Syllabic', 'other large scripts'],
+ 0x1600 => [0x100, 'X L Alpha', 'recent'],
+ 0x1700 => [0x450, 'X L Han', 'east asian'],
+ 0x1B50 => [0x070, 'X L Syllabic', 'proto-elamite'],
+ 0x1BC0 => [0x040, 'X L Alpha', 'shorthands'],
+ 0x1C00 => [0x100, '', 'other large scripts'],
+ 0x1D00 => [0x040, 'X So', 'notational systems'],
+ 0x1D40 => [0x040, 'X L Latin', 'mathematical'], # Sm
+ 0x1D80 => [0x040, 'X L Alpha', 'sutton signs'],
+ 0x1DC0 => [0x040, '', 'notational'],
+ 0x1E00 => [0x080, 'X L Alpha', 'ltr'],
+ 0x1E80 => [0x080, 'X L Alpha', 'rtl'],
+ 0x1F00 => [0x010, 'X So', 'game'],
+ 0x1F10 => [0x020, 'X L So', 'enclosed'],
+ 0x1F30 => [0x050, 'X So', 'pictographic'],
+ 0x1F80 => [0x010, 'X So', 'arrows'],
+ 0x1F90 => [0x070, '', 'unassigned'],
+};