-
-
Save pklaus/dce37521579513c574d0 to your computer and use it in GitHub Desktop.
Extracting font names from TTF/OTF files using Python and fontTools
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
From | |
https://github.com/gddc/ttfquery/blob/master/ttfquery/describe.py | |
and | |
http://www.starrhorne.com/2012/01/18/how-to-extract-font-names-from-ttf-files-using-python-and-our-old-friend-the-command-line.html | |
ported to Python 3 | |
""" | |
import sys | |
from fontTools import ttLib | |
FONT_SPECIFIER_NAME_ID = 4 | |
FONT_SPECIFIER_FAMILY_ID = 1 | |
def shortName( font ): | |
"""Get the short name from the font's names table""" | |
name = "" | |
family = "" | |
for record in font['name'].names: | |
if b'\x00' in record.string: | |
name_str = record.string.decode('utf-16-be') | |
else: | |
name_str = record.string.decode('utf-8') | |
if record.nameID == FONT_SPECIFIER_NAME_ID and not name: | |
name = name_str | |
elif record.nameID == FONT_SPECIFIER_FAMILY_ID and not family: | |
family = name_str | |
if name and family: break | |
return name, family | |
tt = ttLib.TTFont(sys.argv[1]) | |
print("Name: %s Family: %s" % shortName(tt)) |
MIT License.
Here's a version that doesn't even need fonttools. It's also built for speed. We do not read any unneeded bytes or parse anything that isn't directly helping us find these values. In testing this parsed about 31 files per millisecond.
def query_name(filename):
def get_string(f, off, length):
string = None
try:
location = f.tell()
f.seek(off)
string = f.read(length)
f.seek(location)
return string.decode("UTF-16BE")
except UnicodeDecodeError:
try:
return string.decode("UTF8")
except UnicodeDecodeError:
return string
with open(filename, "rb") as f:
(
sfnt_version,
num_tables,
search_range,
entry_selector,
range_shift,
) = struct.unpack(">LHHHH", f.read(12))
name_table = False
for i in range(num_tables):
tag, checksum, offset, length = struct.unpack(">4sLLL", f.read(16))
if tag == b"name":
f.seek(offset)
name_table = True
break
if not name_table:
return None, None, None
# We are now at the name table.
table_start = f.tell()
(
fmt,
count,
strings_offset,
) = struct.unpack(">HHH", f.read(6))
if fmt == 1:
(langtag_count,) = struct.unpack(">H", f.read(2))
for langtag_record in range(langtag_count):
(langtag_len, langtag_offset) = struct.unpack(">HH", f.read(4))
font_family = None
font_subfamily = None
font_name = None
for record_index in range(count):
(
platform_id,
platform_specific_id,
language_id,
name_id,
length,
record_offset,
) = struct.unpack(">HHHHHH", f.read(2 * 6))
pos = table_start + strings_offset + record_offset
if name_id == 1:
font_family = get_string(f, pos, length)
elif name_id == 2:
font_family = get_string(f, pos, length)
elif name_id == 4:
font_name = get_string(f, pos, length)
if font_family and font_subfamily and font_name:
break
return font_family, font_subfamily, font_name
@tatarize If you really want to decode it by yourself, you should use the right encoding.
Here is what GDI (windows) does:
def get_name_encoding(name: NameRecord) -> Optional[str]:
"""
Parameters:
names (NameRecord): Name record from the naming record
Returns:
The cmap codepoint encoding.
If GDI does not support the name, return None.
"""
# From: https://github.com/MicrosoftDocs/typography-issues/issues/956#issuecomment-1205678068
if name.platformID == 3:
if name.platEncID == 3:
return "cp936"
elif name.platEncID == 4:
if name.nameID == 2:
return "utf_16_be"
else:
return "cp950"
elif name.platEncID == 5:
if name.nameID == 2:
return "utf_16_be"
else:
return "cp949"
else:
return "utf_16_be"
elif name.platformID == 1 and name.platEncID == 0:
return "iso-8859-1"
return None
@staticmethod
def get_decoded_name(name: NameRecord) -> str:
"""
Parameters:
names (NameRecord): Name record from the naming record
Returns:
The decoded name
"""
encoding = FontParser.get_name_encoding(name)
if name.platformID == 3 and encoding != "utf_16_be":
# Compatibility for really old font
name_to_decode = name.string.replace(b"\x00", b"")
else:
name_to_decode = name.string
return name_to_decode.decode(encoding)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@moi15moi My mistake, sorry, the brotli installation was missing.
Now there are no problems with woff2 files.
Yes, I am using your method, thank you!