2 unibmp2hex - program to turn a .bmp or .wbmp glyph matrix into a
3 GNU Unifont hex glyph set of 256 characters
5 Synopsis: unibmp2hex [-iin_file.bmp] [-oout_file.hex] [-phex_page_num] [-w]
8 Author: Paul Hardy, unifoundry <at> unifoundry.com, December 2007
11 Copyright (C) 2007, 2008, 2013 Paul Hardy
15 This program is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 2 of the License, or
18 (at your option) any later version.
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program. If not, see <http://www.gnu.org/licenses/>.
36 unsigned hexdigit[16][4]; /* 32 bit representation of 16x8 0..F bitmap */
38 unsigned uniplane=0; /* Unicode plane number, 0..0xff ff ff */
39 unsigned planeset=0; /* =1: use plane specified with -p parameter */
40 unsigned flip=0; /* =1 if we're transposing glyph matrix */
41 unsigned forcewide=0; /* =1 to set each glyph to 16 pixels wide */
43 /* The six Unicode plane digits, from left-most (0) to right-most (5) */
44 unsigned unidigit[6][4];
49 main (int argc, char *argv[])
52 int i, j, k; /* loop variables */
53 unsigned char inchar; /* temporary input character */
54 char header[MAXBUF]; /* input buffer for bitmap file header */
55 int wbmp=0; /* =0 for Windows Bitmap (.bmp); 1 for Wireless Bitmap (.wbmp) */
56 int fatal; /* =1 if a fatal error occurred */
57 int match; /* =1 if we're still matching a pattern, 0 if no match */
58 int empty1, empty2; /* =1 if bytes tested are all zeroes */
59 unsigned char thischar1[16], thischar2[16]; /* bytes of hex char */
60 int thisrow; /* index to point into thischar1[] and thischar2[] */
61 int tmpsum; /* temporary sum to see if a character is blank */
63 unsigned char bitmap[17*32][18*32/8]; /* final bitmap */
64 char wide[65536]={65536 * 0}; /* 1 = force double width code point */
66 char *infile="", *outfile=""; /* names of input and output files */
67 FILE *infp, *outfp; /* file pointers of input and output files */
70 for (i = 1; i < argc; i++) {
71 if (argv[i][0] == '-') { /* this is an option argument */
73 case 'i': /* name of input file */
76 case 'o': /* name of output file */
77 outfile = &argv[i][2];
79 case 'p': /* specify a Unicode plane */
80 sscanf (&argv[i][2], "%x", &uniplane); /* Get Unicode plane */
81 planeset = 1; /* Use specified range, not what's in bitmap */
83 case 'w': /* force wide (16 pixels) for each glyph */
86 default: /* if unrecognized option, print list and exit */
87 fprintf (stderr, "\nSyntax:\n\n");
88 fprintf (stderr, " %s -p<Unicode_Page> ", argv[0]);
89 fprintf (stderr, "-i<Input_File> -o<Output_File> -w\n\n");
90 fprintf (stderr, " -w specifies .wbmp output instead of ");
91 fprintf (stderr, "default Windows .bmp output.\n\n");
92 fprintf (stderr, " -p is followed by 1 to 6 ");
93 fprintf (stderr, "Unicode plane hex digits ");
94 fprintf (stderr, "(default is Page 0).\n\n");
95 fprintf (stderr, "\nExample:\n\n");
96 fprintf (stderr, " %s -p83 -iunifont.hex -ou83.bmp\n\n\n",
104 Make sure we can open any I/O files that were specified before
107 if (strlen (infile) > 0) {
108 if ((infp = fopen (infile, "r")) == NULL) {
109 fprintf (stderr, "Error: can't open %s for input.\n", infile);
116 if (strlen (outfile) > 0) {
117 if ((outfp = fopen (outfile, "w")) == NULL) {
118 fprintf (stderr, "Error: can't open %s for output.\n", outfile);
126 Initialize selected code points for double width (16x16).
127 Double-width is forced in cases where a glyph (usually a combining
128 glyph) only occupies the left-hand side of a 16x16 grid, but must
129 be rendered as double-width to appear properly with other glyphs
130 in a given script. If additions were made to a script after
131 Unicode 5.0, the Unicode version is given in parentheses after
134 for (i = 0x0700; i <= 0x074F; i++) wide[i] = 1; /* Syriac */
135 for (i = 0x0800; i <= 0x083F; i++) wide[i] = 1; /* Samaritan (5.2) */
136 for (i = 0x0900; i <= 0x0DFF; i++) wide[i] = 1; /* Indic */
137 for (i = 0x0F00; i <= 0x0FFF; i++) wide[i] = 1; /* Tibetan */
138 for (i = 0x1100; i <= 0x11FF; i++) wide[i] = 1; /* Hangul Jamo */
139 for (i = 0x1800; i <= 0x18AF; i++) wide[i] = 1; /* Mongolian */
140 for (i = 0x1900; i <= 0x194F; i++) wide[i] = 1; /* Limbu */
141 for (i = 0x1980; i <= 0x19DF; i++) wide[i] = 1; /* New Tai Lue */
142 for (i = 0x1A00; i <= 0x1A1F; i++) wide[i] = 1; /* Buginese */
143 for (i = 0x1B00; i <= 0x1B7F; i++) wide[i] = 1; /* Balinese */
144 for (i = 0x1B80; i <= 0x1BBF; i++) wide[i] = 1; /* Sundanese (5.1) */
145 for (i = 0x1BC0; i <= 0x1BFF; i++) wide[i] = 1; /* Batak (6.0) */
146 for (i = 0x1C00; i <= 0x1C4F; i++) wide[i] = 1; /* Lepcha (5.1) */
147 for (i = 0x1CD0; i <= 0x1CFF; i++) wide[i] = 1; /* Vedic Extensions (5.2) */
148 for (i = 0x2E80; i <= 0xA4CF; i++) wide[i] = 1; /* CJK */
149 for (i = 0x1A20; i <= 0x1AAF; i++) wide[i] = 1; /* Tai Tham (5.2) */
150 for (i = 0xA930; i <= 0xA95F; i++) wide[i] = 1; /* Rejang (5.1) */
151 for (i = 0xA980; i <= 0xA9DF; i++) wide[i] = 1; /* Javanese (5.2) */
152 for (i = 0xAA00; i <= 0xAA5F; i++) wide[i] = 1; /* Cham (5.1) */
153 for (i = 0xAAE0; i <= 0xAAFF; i++) wide[i] = 1; /* Meetei Mayek Ext (6.0) */
154 for (i = 0xABC0; i <= 0xABFF; i++) wide[i] = 1; /* Meetei Mayek (5.2) */
156 wide[0x303F] = 0; /* CJK half-space fill */
158 /* Supplemental Multilingual Plane (Plane 01) */
159 for (i = 0x010A00; i <= 0x010A5F; i++) wide[i] = 1; /* Kharoshthi */
160 for (i = 0x011000; i <= 0x01107F; i++) wide[i] = 1; /* Brahmi */
161 for (i = 0x011080; i <= 0x0110CF; i++) wide[i] = 1; /* Kaithi */
162 for (i = 0x011100; i <= 0x01114F; i++) wide[i] = 1; /* Chakma */
163 for (i = 0x011180; i <= 0x0111DF; i++) wide[i] = 1; /* Sharada */
164 for (i = 0x011680; i <= 0x0116CF; i++) wide[i] = 1; /* Takri */
165 for (i = 0x016F00; i <= 0x016F9F; i++) wide[i] = 1; /* Mino */
166 for (i = 0x01D100; i <= 0x01D1FF; i++) wide[i] = 1; /* Musical Symbols */
167 for (i = 0x01D200; i <= 0x01D24F; i++) wide[i] = 1; /* Ancient Greek Musical Notation */
170 Determine whether or not the file is a Microsoft Windows Bitmap file.
171 If it starts with 'B', 'M', assume it's a Windows Bitmap file.
172 Otherwise, assume it's a Wireless Bitmap file.
174 WARNING: There isn't much in the way of error checking here --
175 if you give it a file that wasn't first created by hex2bmp.c,
178 fatal = 0; /* assume everything is okay with reading input file */
179 if ((header[0] = fgetc (infp)) != EOF) {
180 if ((header[1] = fgetc (infp)) != EOF) {
181 if (header[0] == 'B' && header[1] == 'M') {
182 wbmp = 0; /* Not a Wireless Bitmap -- it's a Windows Bitmap */
185 wbmp = 1; /* Assume it's a Wireless Bitmap */
195 fprintf (stderr, "Fatal error; end of input file.\n\n");
199 If this is a Wireless Bitmap (.wbmp) format file,
200 skip the header and point to the start of the bitmap itself.
204 header[i] = fgetc (infp);
208 for (i=0; i < 32*17; i++) {
209 for (j=0; j < 32*18/8; j++) {
210 inchar = fgetc (infp);
211 bitmap[i][j] = ~inchar; /* invert bits for proper color */
216 Otherwise, this must be a Windows Bitmap file, because we check
217 for that first. Skip past the header, but save it for possible
221 for (i=2; i<0x3e; i++)
222 header[i] = fgetc (infp);
226 for (i = 32*17-1; i >= 0; i--) {
227 for (j=0; j < 32*18/8; j++) {
228 inchar = fgetc (infp);
229 bitmap[i][j] = ~inchar; /* invert bits for proper color */
234 We've read the entire file. Now close the input file pointer.
238 We now have the header portion in the header[] array,
239 and have the bitmap portion from top-to-bottom in the bitmap[] array.
242 If no Unicode range (U+nnnnnn00 through U+nnnnnnFF) was specified
243 with a -p parameter, determine the range from the digits in the
246 Store bitmaps for the hex digit patterns that this file uses.
248 if (!planeset) { /* If Unicode range not specified with -p parameter */
249 for (i = 0x0; i <= 0xF; i++) { /* hex digit pattern we're storing */
250 for (j = 0; j < 4; j++) {
252 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 ][6] << 24 ) |
253 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 + 1][6] << 16 ) |
254 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 + 2][6] << 8 ) |
255 ((unsigned)bitmap[32 * (i+1) + 4 * j + 8 + 3][6] );
259 Read the Unicode plane digits into arrays for comparison, to
260 determine the upper four hex digits of the glyph addresses.
262 for (i = 0; i < 4; i++) {
263 for (j = 0; j < 4; j++) {
265 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 1][i + 3] << 24 ) |
266 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 2][i + 3] << 16 ) |
267 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 3][i + 3] << 8 ) |
268 ((unsigned)bitmap[32 * 0 + 4 * j + 8 + 4][i + 3] );
273 for (i = 4; i < 6; i++) {
274 for (j = 0; j < 4; j++) {
276 ((unsigned)bitmap[32 * 1 + 4 * j + 8 ][i] << 24 ) |
277 ((unsigned)bitmap[32 * 1 + 4 * j + 8 + 1][i] << 16 ) |
278 ((unsigned)bitmap[32 * 1 + 4 * j + 8 + 2][i] << 8 ) |
279 ((unsigned)bitmap[32 * 1 + 4 * j + 8 + 3][i] );
280 tmpsum |= unidigit[i][j];
283 if (tmpsum == 0) { /* the glyph matrix is transposed */
284 flip = 1; /* note transposed order for processing glyphs in matrix */
286 Get 5th and 6th hex digits by shifting first column header left by
287 1.5 columns, thereby shifting the hex digit right after the leading
288 "U+nnnn" page number.
290 for (i = 0x08; i < 0x18; i++) {
291 bitmap[i][7] = (bitmap[i][8] << 4) | ((bitmap[i][ 9] >> 4) & 0xf);
292 bitmap[i][8] = (bitmap[i][9] << 4) | ((bitmap[i][10] >> 4) & 0xf);
294 for (i = 4; i < 6; i++) {
295 for (j = 0; j < 4; j++) {
297 ((unsigned)bitmap[4 * j + 8 + 1][i + 3] << 24 ) |
298 ((unsigned)bitmap[4 * j + 8 + 2][i + 3] << 16 ) |
299 ((unsigned)bitmap[4 * j + 8 + 3][i + 3] << 8 ) |
300 ((unsigned)bitmap[4 * j + 8 + 4][i + 3] );
306 Now determine the Unicode plane by comparing unidigit[0..5] to
307 the hexdigit[0x0..0xF] array.
310 for (i=0; i<6; i++) { /* go through one bitmap digit at a time */
311 match = 0; /* haven't found pattern yet */
312 for (j = 0x0; !match && j <= 0xF; j++) {
313 if (unidigit[i][0] == hexdigit[j][0] &&
314 unidigit[i][1] == hexdigit[j][1] &&
315 unidigit[i][2] == hexdigit[j][2] &&
316 unidigit[i][3] == hexdigit[j][3]) { /* we found the digit */
326 Now read each glyph and print it as hex.
328 for (i = 0x0; i <= 0xf; i++) {
329 for (j = 0x0; j <= 0xf; j++) {
330 for (k = 0; k < 16; k++) {
331 if (flip) { /* transpose glyph matrix */
332 thischar1[k] = bitmap[32*(j+1) + k + 7][4 * (i+2) + 1];
333 thischar2[k] = bitmap[32*(j+1) + k + 7][4 * (i+2) + 2];
336 thischar1[k] = bitmap[32*(i+1) + k + 7][4 * (j+2) + 1];
337 thischar2[k] = bitmap[32*(i+1) + k + 7][4 * (j+2) + 2];
341 If the second half of the 16*16 character is all zeroes, this
342 character is only 8 bits wide, so print a half-width character.
345 for (k=0; (empty1 || empty2) && k < 16; k++) {
346 if (thischar1[k] != 0) empty1 = 0;
347 if (thischar2[k] != 0) empty2 = 0;
350 Only print this glyph if it isn't blank.
352 if (!empty1 || !empty2) {
354 If the second half is empty, this is a half-width character.
355 Only print the first half.
358 Original GNU Unifont format is four hexadecimal digit character
359 code followed by a colon followed by a hex string. Add support
360 for codes beyond the Basic Multilingual Plane.
362 Unicode ranges from U+0000 to U+10FFFF, so print either a
363 4-digit or a 6-digit code point. Note that this software
364 should support up to an 8-digit code point, extending beyond
365 the normal Unicode range, but this has not been fully tested.
368 fprintf (outfp, "%04X%X%X:", uniplane, i, j); // 6 digit code pt.
370 fprintf (outfp, "%02X%X%X:", uniplane, i, j); // 4 digit code pt.
371 for (thisrow=0; thisrow<16; thisrow++) {
373 If second half is empty and we're not forcing this
374 code point to double width, print as single width
377 empty2 && !wide[(uniplane << 8) | (i << 4) | j])
384 thischar1[thisrow], thischar2[thisrow]);
386 fprintf (outfp, "\n");