Last active
October 6, 2015 11:47
-
-
Save zr-tex8r/2988601 to your computer and use it in GitHub Desktop.
A program to show the mapping between GIDs and glyph names for a given TrueType font
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use strict; | |
my $prog_name = "zrotfdump"; | |
my ($mode, $smode, $font_file); | |
my @mglist = qw( | |
.notdef .null nonmarkingreturn space exclam quotedbl numbersign | |
dollar percent ampersand quotesingle parenleft parenright | |
asterisk plus comma hyphen period slash zero one two three four | |
five six seven eight nine colon semicolon less equal greater | |
question at A B C D E F G H I J K L M N O P Q R S T U V W X Y Z | |
bracketleft backslash bracketright asciicircum underscore grave | |
a b c d e f g h i j k l m n o p q r s t u v w x y z braceleft | |
bar braceright asciitilde Adieresis Aring Ccedilla Eacute | |
Ntilde Odieresis Udieresis aacute agrave acircumflex adieresis | |
atilde aring ccedilla eacute egrave ecircumflex edieresis | |
iacute igrave icircumflex idieresis ntilde oacute ograve | |
ocircumflex odieresis otilde uacute ugrave ucircumflex | |
udieresis dagger degree cent sterling section bullet paragraph | |
germandbls registered copyright trademark acute dieresis | |
notequal AE Oslash infinity plusminus lessequal greaterequal | |
yen mu partialdiff summation product pi integral ordfeminine | |
ordmasculine Omega ae oslash questiondown exclamdown logicalnot | |
radical florin approxequal Delta guillemotleft guillemotright | |
ellipsis nonbreakingspace Agrave Atilde Otilde OE oe endash | |
emdash quotedblleft quotedblright quoteleft quoteright divide | |
lozenge ydieresis Ydieresis fraction currency guilsinglleft | |
guilsinglright fi fl daggerdbl periodcentered quotesinglbase | |
quotedblbase perthousand Acircumflex Ecircumflex Aacute | |
Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute | |
Ocircumflex apple Ograve Uacute Ucircumflex Ugrave dotlessi | |
circumflex tilde macron breve dotaccent ring cedilla | |
hungarumlaut ogonek caron Lslash lslash Scaron scaron Zcaron | |
zcaron brokenbar Eth eth Yacute yacute Thorn thorn minus | |
multiply onesuperior twosuperior threesuperior onehalf | |
onequarter threequarters franc Gbreve gbreve Idotaccent | |
Scedilla scedilla Cacute cacute Ccaron ccaron dcroat | |
); | |
#--------------------------------------- | |
use Data::Dump 'dump'; | |
my %proc = ( | |
name => \&main_name, | |
cmap => \&main_cmap, | |
); | |
sub main { | |
read_option(); | |
my ($hfont); | |
open($hfont, '<', $font_file) && binmode($hfont) | |
or error("cannot open for input", $font_file); | |
my $data = $proc{$mode}->($hfont); | |
dump_data($data); | |
close($hfont); | |
} | |
#--------------------------------------- | |
sub main_cmap { | |
my ($hfont) = @_; | |
my $head = read_head($hfont) | |
or error("cannot read header", $font_file); | |
(defined $head->{post}) | |
or error("cannot find 'post' table", $font_file); | |
(defined $head->{cmap}) | |
or error("cannot find 'post' table", $font_file); | |
my @post2 = parse_post($hfont, @{$head->{post}}); | |
my $post = read_post(@post2) if (@post2); | |
my $cmaphead = read_cmap_head($hfont, @{$head->{cmap}}); | |
my $ofs12 = find_cmap($cmaphead, 12) | |
or error("cnnot find cmap-12 table", $font_file); | |
my $cmap12 = parse_cmap_12($hfont, $ofs12); | |
if ($smode eq '12') { | |
return read_cmap_12($cmap12, $post); | |
} elsif ($smode eq '14') { | |
my $ofs14 = find_cmap($cmaphead, 14) | |
or error("cnnot find cmap-14 table", $font_file); | |
my $cmap14 = parse_cmap_14($hfont, $ofs14, $cmap12); | |
return read_cmap_14($cmap14, $cmap12, $post); | |
} | |
} | |
sub find_cmap { | |
my ($chead, $fmt) = @_; | |
foreach my $e (@$chead) { | |
my ($pid, $eid, $ofs, $fmt1) = @$e; | |
($fmt1 == $fmt && | |
(($pid == 3 && $eid == 1) || | |
($pid == 3 && $eid == 10) || | |
($pid == 0 && $eid == 5))) or next; | |
return $ofs; | |
} | |
return; | |
} | |
sub read_cmap_head { | |
my ($hin, $ofs, $len) = @_; | |
my $buf = read_file($hin, $ofs, $len) or return; | |
my (@fh) = unpack("nn", substr($buf, 0, 4)); | |
my $nctbl = $fh[1]; | |
($fh[0] == 0x0 && $nctbl > 0) or return; | |
my (@fe) = unpack("(nnN)*", substr($buf, 4, $nctbl * 8)); | |
my @ary; | |
while (@fe) { | |
my (@f1) = splice(@fe, 0, 3); | |
$f1[2] += $ofs; | |
my (@fc) = unpack("nn", read_file($hin, $f1[2], 4)); | |
push(@ary, [ @f1, @fc ]); | |
} | |
return \@ary; | |
} | |
sub parse_cmap_12 { | |
my ($hin, $ofs) = @_; | |
my @fh = unpack("nnNNN", read_file($hin, $ofs, 16)); | |
($fh[0] == 12 && $fh[2] > 0 && $fh[4] > 0) or return; | |
my $buf = read_file($hin, $ofs, $fh[2]); | |
my @vs = unpack("N*", substr($buf, 16)); | |
(scalar(@vs) == $fh[4] * 3) or return; | |
my (@cmap); | |
while (@vs) { | |
my ($suc, $euc, $gid) = splice(@vs, 0, 3); | |
foreach my $uc ($suc .. $euc) { | |
push(@cmap, [ $uc, $gid++ ]); | |
} | |
} | |
return \@cmap; | |
} | |
sub read_cmap_12 { | |
my ($cmap12, $post) = @_; | |
(defined $post) or return $cmap12; | |
my @data = map { | |
my $n = $post->[$_->[1]]; | |
[ @$_, (defined $n) ? $n : '' ] | |
} (@$cmap12); | |
return \@data; | |
} | |
sub parse_cmap_14 { | |
my ($hin, $ofs) = @_; | |
my @fh = unpack("nNN", read_file($hin, $ofs, 10)); | |
($fh[0] == 14 && $fh[1] > 0) or return; | |
my $buf = read_file($hin, $ofs, $fh[1]); | |
my @vs = unpack("(a3NN)*", substr($buf, 10, $fh[2] * 11)); | |
(scalar(@vs) == $fh[2] * 3) or return; | |
my (%cmap); | |
while (@vs) { | |
my ($vs, $dofs, $nofs) = splice(@vs, 0, 3); | |
my (@d, @n); | |
if ($dofs > 0) { | |
my @x = unpack("N/(a3c)", substr($buf, $dofs)); | |
while (@x) { | |
my ($suc, $n) = splice(@x, 0, 2); | |
$suc = int24($suc); | |
push(@d, $suc .. ($suc + $n)); | |
} | |
} | |
if ($nofs > 0) { | |
my @x = unpack("N/(a3n)", substr($buf, $nofs)); | |
while (@x) { | |
my ($uc, $gid) = splice(@x, 0, 2); | |
$uc = int24($uc); | |
push(@n, [$uc, $gid]); | |
} | |
} | |
$cmap{int24($vs)} = [\@d, \@n]; | |
} | |
return \%cmap; | |
} | |
sub read_cmap_14 { | |
my ($cmap14, $cmap12, $post) = @_; | |
my (%dgid, @ent); | |
foreach my $e (@$cmap12) { | |
$dgid{$e->[0]} = $e->[1]; | |
} | |
foreach my $vs (keys %$cmap14) { | |
my ($d, $n) = @{$cmap14->{$vs}}; | |
foreach my $uc (@$d) { | |
my $gid = $dgid{$uc}; | |
push(@ent, [$uc, $vs, $gid, ss($post->[$gid])]); | |
} | |
foreach my $e (@$n) { | |
my ($uc, $gid) = @$e; | |
push(@ent, [$uc, $vs, $gid, ss($post->[$gid])]); | |
} | |
} | |
@ent = sort { | |
($a->[0] <=> $b->[0]) || ($a->[1] <=> $b->[1]) | |
} (@ent); | |
return \@ent; | |
} | |
#--------------------------------------- | |
sub ss { | |
return (defined $_[0]) ? $_[0] : ''; | |
} | |
sub int24 { | |
return unpack("N", "\0".$_[0]); | |
} | |
sub read_file { | |
my ($hin, $ofs, $len) = @_; | |
seek($hin, $ofs, 0); | |
my ($buf); read($hin, $buf, $len); | |
(length($buf) == $len) or return; | |
return $buf; | |
} | |
sub data_from_array { | |
my ($array) = @_; | |
my @data = map { | |
my $v = $array->[$_]; | |
[ $_, (ref $v) ? @$v : $v ] | |
} (0..$#$array); | |
return \@data; | |
} | |
sub data_from_hash { | |
my ($hash) = @_; | |
my @data = map { | |
my $v = $hash->{$_}; | |
[ $_, (ref $v) ? @$v : $v ] | |
} (keys %$hash); | |
return \@data; | |
} | |
sub dump_data { | |
my ($data) = @_; | |
foreach (@$data) { | |
print(join("\t", @$_), "\n"); | |
} | |
} | |
sub read_head { | |
my ($hin) = @_; | |
seek($hin, 0, 0); | |
my ($buf); read($hin, $buf, 12); | |
my ($ver, $ntbl) = unpack("Nnnnn", $buf); | |
($ver == 0x10000) | |
or info("Unexpected sfnt version"); | |
my %res; | |
foreach (1 .. $ntbl) { | |
read($hin, $buf, 16); | |
my ($tag, $csum, $ofs, $len) = unpack("a4NNN", $buf); | |
$res{$tag} = [ $ofs, $len ]; | |
} | |
return \%res; | |
} | |
#--------------------------------------- | |
sub main_name { | |
my ($hfont) = @_; | |
my $head = read_head($hfont) | |
or error("cannot read header", $font_file); | |
(defined $head->{post}) | |
or error("cannot find 'post' table", $font_file); | |
my ($name, $idx) = parse_post($hfont, @{$head->{post}}) | |
or error("cannot read glyph name table", $font_file); | |
if ($smode eq '') { | |
return data_from_array(read_post($name, $idx)); | |
} elsif ($smode eq 'r') { | |
return data_from_array($idx); | |
} elsif ($smode eq 'x') { | |
return data_from_array(read_post_x($name, $idx)); | |
} | |
} | |
sub parse_post { | |
my ($hin, $ofs, $len) = @_; | |
seek($hin, $ofs, 0); | |
my ($buf); read($hin, $buf, $len); | |
(length($buf) == $len) or return; | |
my (@f) = unpack("NNnnNNNNNn", $buf); | |
my $nglf = $f[9]; | |
($f[0] == 0x20000 && $nglf > 0) or return; | |
my @idx = unpack("n*", substr($buf, 34, $nglf * 2)); | |
(scalar(@idx) == $nglf) or return; | |
$buf = substr($buf, 34 + $nglf * 2); | |
my @nams = (@mglist, unpack("(c/a)*", $buf)); | |
return ( \@nams, \@idx ); | |
} | |
sub read_post { | |
my ($nams, $idx) = @_; | |
my @res = map { | |
my $n = $nams->[$_]; | |
(defined $n) ? $n : return; | |
} (@$idx); | |
return \@res; | |
} | |
sub read_post_x { | |
my ($nams, $idx) = @_; | |
my @res = map { | |
my $n = $nams->[$_]; | |
(defined $n) ? [$_, $n] : return; | |
} (@$idx); | |
return \@res; | |
} | |
#--------------------------------------- | |
my %command = ( | |
'name' => ['name', ''], | |
'name-x' => ['name', 'x'], | |
'name-r' => ['name', 'r'], | |
'cmap-12' => ['cmap', '12'], | |
'cmap-14' => ['cmap', '14'], | |
); | |
sub read_option { | |
($mode, $smode) = ('', ''); | |
while ($ARGV[0] =~ /^-/) { | |
my $opt = shift(@ARGV); | |
if ($opt =~ /^--?h(elp)?$/) { | |
show_usage(); | |
} else { | |
error("invalid option", $opt); | |
} | |
} | |
(@ARGV) or show_usage(); | |
($#ARGV == 1) or error("wrong number of arguments"); | |
my $cmd = shift(@ARGV); | |
(defined $command{$cmd}) or error("unknown command", $cmd); | |
($mode, $smode) = @{$command{$cmd}}; | |
$font_file = shift(@ARGV); | |
} | |
sub show_usage { | |
print <<"EOT"; exit; | |
Usage: $prog_name <command> <font_file> | |
Here <command> is one of the following: | |
name glyph name list (gid/name) | |
cmap-12 Unicode map (code/gid/name) | |
cmap-14 Unicode VS map (base-code/VS-code/gid/name) | |
EOT | |
} | |
sub info { | |
print STDERR (join(": ", $prog_name, @_), "\n"); | |
} | |
sub error { | |
info(@_); exit(-1); | |
} | |
#--------------------------------------- | |
main(); | |
# EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment