#!/usr/bin/perl -w # # Copyright (C) 1998, 2013 Jungshik Shin, Paul Hardy # # johab2ucs2.pl # This script(working as filter) converts Hangul "Johab encoded fonts" # with an unofficial XLFD name "-johab" in BDF format # to UCS-2 encoded font in a format defined by # Roman Czyborra at # http://czyborra.com/unifont/ # # LICENSE: # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # # 'hanterm304font.tar.gz contains about a dozen of # "Johab-encoded" fonts. The package is available # ftp://ftp.kaist.ac.kr/hangul/terminal/hanterm/hanterm304beta/fonts # Please, note that this script only works with fonts whose # XLFD name ends with # # --16-160-75-75-c-160-johab-1 # (and whose file name in the package doesn't include 's' or 'sh' preceding # '(m|g)16.bdf'. ) # # There are four of them : # johabg16.bdf,johabm16.bdf,johabp16.bdf,iyagi16.bdf. # # Fonts in the package with other XLFD names # (johabs and johabsh) contain glyphs for about 5000 Hanjas and special symbols # defined in KS C 5601-1987. # Sep. 29, 1998 # Jungshik Shin # A more complete routine which not only covers # *modern* pre-composed Hangul syllables in UAC00-UD7A3 # but also supports dynamic rendering of # Hangul syllables(medieval as well as modern) # using Hangul comibining Jamos at [U1100-U11FF] # was made by Deog-tae Kim unfoundry.com): # # - In tconBase, "459" index corrected to "449". # - Modified subroutine get_ind to always return 0 for final # if no final consonant is in the composite syllable. # Previously it always added $tconMap[$m] to the final # consonant location even if there was no final consonant. # - Index arrays were extended to cover all of Johab encoded # Hangul, even though not all glyphs are used to generate # the Unicode Hangul Syllables range. # - Added comments on the letters in the letter arrays # Conversion routine from Hangul Jamo index to glyph index # of Hangul "Johab-encoded" fonts as used by # Hangul xterm, hanterm. # The following routine is based on Hanterm by Song, Jaekyung # available at ftp://ftp.kaist.ac.kr/hangul/terminal/hanterm # Leading Consonant index values: # # Modern Letters: Archaic Letters (no Romanization): # # 0 G (choseong kiyeok) 19 (choseong kapyeounpieup) # 1 GG (choseong ssangkiyeok) 20 (choseong pieup-kiyeok) # 2 N (choseong nieun) 21 (choseong sios-kiyeok) # 3 D (choseong tikeut) 22 (choseong pieup-tikeut) # 4 DD (choseong ssangtikeut) 23 (choseong sios-tikeut) # 5 R (choseong rieul) 24 (choseong sios-pieup) # 6 M (choseong mieum) 25 (choseong pieup-sios) # 7 B (choseong pieup) 26 (choseong pansios) # 8 BB (choseong ssangpieup) 27 (choseong yesieung) # 9 S (choseong sios) 28 (choseong pieup-cieuc) # 10 SS (choseong ssangsios) 29 (choseong sios-cieuc) # 11 ieung (choseong ieung) 30 (choseong yeorinhieuh) # 12 J (choseong cieuc) # 13 JJ (choseong ssangcieuc) # 14 C (choseong chieuch) # 15 K (choseong khieukh) # 16 T (choseong thieuth) # 17 P (choseong phieuph) # 18 H (choseong hieuh) # # Middle Letter index values: # # Modern Letters: Archaic Letters (no Romanization): # # 0 Filler (blank) 22 YO-YA (jungseong yo-ya) # 1 A (jungseong a) 23 YO-YAE (jungseong yo-yae) # 2 AE (jungseong ae) 24 YO-I (jungseong yo-i) # 3 YA (jungseong ya) 25 YU-YEO (jungseong yu-yeo) # 4 YAE (jungseong yae) 26 YU-YE (jungseong yu-ye) # 5 EO (jungseong eo) 27 YU-I (jungseong yu-i) # 6 E (jungseong e) 28 araea (jungseong araea) # 7 YEO (jungseong yeo) 29 araea-i (jungseong araea-i) # 8 YE (jungseong ye) # 9 O (jungseong o) # 10 WA (jungseong wa) # 11 WAE (jungseong wae) # 12 OE (jungseong oe) # 13 YO (jungseong yo) # 14 U (jungseong u) # 15 WEO (jungseong weo) # 16 WE (jungseong we) # 17 WI (jungseong wi) # 18 YU (jungseong yu) # 19 EU (jungseong eu) # 20 YI (jungseong yi) # 21 I (jungseong i) # # Terminal (Final) Letter index values: # # Modern Letters: Archaic Letters (no Romanization): # # 0 Filler (blank) 28 (jongseong rieul-hieuh) # 1 G (jongseong kiyeok) 29 (jongseong mieum-kiyeok) # 2 GG (jongseong ssangkiyeok) 30 (jongseong yeorinhieuh) # 3 GS (jongseong kiyeok-sios) 31 (jongseong yesieung) # 4 N (jongseong nieun) # 5 NJ (jongseong nieun-cieuc) # 6 NH (jongseong niuen-hieuh) # 7 D (jongseong tikeut) # 8 L (jongseong rieul) # 9 LG (jongseong rieul-kiyeok) # 10 LM (jongseong rieul-mieum) # 11 LB (jongseong rieul-pieup) # 12 LS (jongseong rieul-sios) # 13 LT (jongseong rieul-thieuth) # 14 LP (jongseong rieul-phieuph) # 15 LH (jongseong rieul-hieuh) # 16 M (jongseong mieum) # 17 B (jongseong pieup) # 18 BS (jongseong pieup-sios) # 19 S (jongseong sios) # 20 SS (jongseong ssangsios) # 21 NG (jongseong ieung) # 22 J (jongseong cieuc) # 23 C (jongseong chieuch) # 24 K (jongseong khieukh) # 25 T (jongseong thieuth) # 26 P (jongseong phieuph) # 27 H (jongseong hieuh) # # The base font index for leading consonants @lconBase= ( 1, 11, 21, 31, 41, 51, # G, GG, N, D, DD, R 61, 71, 81, 91, 101, 111, # M, B, BB, S, SS, ieung 121, 131, 141, 151, 161, 171, # J, JJ, C, K, T, P 181, # H -- end of modern set 191, 201, 211, 221, 231, 241, # 251, 261, 271, 281, 291, 301 ); # The base index for vowels @vowBase = ( 0,311,314,317,320,323, # (Fill), A, AE, YA, YAE, EO 326,329,332,335,339,343, # E, YEO, YE, O, WA, WAE 347,351,355,358,361,364, # OI, YO, U, WEO, WE, WI 367,370,374,378, # YU, EU, UI, I -- end of modern set 381, 384, 387, # YO-YA, YO-YAE, YO-YI 390, 393, 396, # YU-YEO, YU-YE, YU-I 399, 402 # araea, araea-i ); # The base font index for trailing consonants @tconBase = ( # modern trailing consonants (filler + 27) 0, 405, 409, 413, 417, 421, # G, GG, GS, N, NJ 425, 429, 433, 437, 441, # NH, D, L, LG, LM 445, 449, 453, 457, 461, # LB, LS, LT, LP, LH 465, 469, 473, 477, 481, # M, B, BS, S, SS 485, 489, 493, 497, 501, # NG, J, C, K, T 505, 509, # P, H -- end of modern set 513, 517, 521, 525 ); # The mapping from vowels to leading consonant type # in absence of trailing consonant @lconMap1 = ( 0,0,0,0,0,0, # (Fill), A, AE, YA, YAE, EO 0,0,0,1,3,3, # E, YEO, YE, O, WA, WAE 3,1,2,4,4,4, # OI, YO, U, WEO, WE, WI 2,1,3,0, # YU, EU, UI, I -- end of modern set 3,3,3,4,4,4, # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I 1,3 # araea, araea-i ); # The mapping from vowels to leading consonant type # in presence of trailing consonant @lconMap2 = ( 5,5,5,5,5,5, # (Fill), A, AE, YA, YAE, EO 5,5,5,6,8,8, # E, YEO, YE, O, WA, WAE 8,6,7,9,9,9, # OI, YO, U, WEO, WE, WI 7,6,8,5, # YU, EU, UI, I -- end of modern set 8,8,8,9,9,9, # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I 6,8 # araea, araea-i ); # vowel type ; 1 = o and its alikes, 0 = others @vowType = ( 0,0,0,0,0,0, 0,0,0,1,1,1, 1,1,0,0,0,0, 0,1,1,0, # end of modern set 1,1,1,0,0,0,1,1 ); # The mapping from trailing consonants to vowel type @tconType = ( 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # end of moder set 1, 1, 1, 1 ); # The mapping from vowels to trailing consonant type @tconMap = ( 0, 0, 2, 0, 2, 1, # (Fill), A, AE, YA, YAE, EO 2, 1, 2, 3, 0, 0, # E, YEO, YE, O, WA, WAE 0, 3, 3, 1, 1, 1, # OI, YO, U, WEO, WE, WI 3, 3, 0, 1, # YU, EU, UI, I -- end of modern set 0, 0, 0, 1, 1, 1, # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I 3, 0 # araea, araea-i ); # read in BITMAP patterns for Jamos from JOHAB-encoded BDF font file # thru STDIN $BITMAP=0; while (<>) { if (/^ENCODING\s+(\d+)/) { $i = $1; $jamo[$i]=""; } elsif (/^BITMAP/) { $BITMAP=1; } elsif (/^ENDCHAR/) { $BITMAP=0; } elsif ($BITMAP) { y/a-f/A-F/; s/\n$//; $jamo[$i] = $jamo[$i] . $_; } } for ( $j=0 ; $j < 11172 ; $j++ ) { $init = int( $j / 21 / 28) ; $medial = int($j / 28 ) % 21+1 ; $final = $j % 28; printf ("%04X:%64s\n", $j+0xAC00, &compose_hangul($init,$medial,$final)) or die ("Cannot print to stdout.\n"); } sub compose_hangul { local($l,$m,$f) = @_; @l_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,1)]); @m_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,2)]); @f_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,3)]); for ( $i = 0; $i < 32; $i++) { $bit[$i]=sprintf("%02X", hex($l_bit[$i]) | hex($m_bit[$i]) | hex($f_bit[$i]) ) or die ("Cannot print to stdout.\n"); } return pack("a2" x 32, @bit ); } sub get_ind { local($l,$m,$f,$wh) = @_; # ($l = 0 && $l < 19 && $m =0 && $m < 21 && $f =0 && $f < 28) or # die ("$0: get_ind() : invalid Jamo index\n"); if ( $wh == 1 ) { # leading consonant index; no final consonant if $f==0 $ind = $lconBase[$l] + ($f > 0 ? $lconMap2[$m] : $lconMap1[$m] ) ; } elsif ( $wh == 2 ) { # medial vowel index $ind = $vowBase[$m]; if ( $vowType[$m] == 1 ) { # For vowels 'o' and alikes, # Giyeok and Kieuk get special treatment $ind += ( ($l==0 || $l == 15) ? 0 : 1) + ($f > 0 ? 2 : 0 ); } else { $ind+= $tconType[$f]; } } else { if ($f == 0) { $ind = 0; } else { $ind = $tconBase[$f] + $tconMap[$m]; } } return $ind; }