pklaus/get_name.py

tatarize · 2024-01-26T12:53:19Z

MIT License.
Here's a version that doesn't even need fonttools. It's also built for speed. We do not read any unneeded bytes or parse anything that isn't directly helping us find these values. In testing this parsed about 31 files per millisecond.

def query_name(filename):
    def get_string(f, off, length):
        string = None
        try:
            location = f.tell()
            f.seek(off)
            string = f.read(length)
            f.seek(location)
            return string.decode("UTF-16BE")
        except UnicodeDecodeError:
            try:
                return string.decode("UTF8")
            except UnicodeDecodeError:
                return string

    with open(filename, "rb") as f:
        (
            sfnt_version,
            num_tables,
            search_range,
            entry_selector,
            range_shift,
        ) = struct.unpack(">LHHHH", f.read(12))

        name_table = False
        for i in range(num_tables):
            tag, checksum, offset, length = struct.unpack(">4sLLL", f.read(16))
            if tag == b"name":
                f.seek(offset)
                name_table = True
                break
        if not name_table:
            return None, None, None

        # We are now at the name table.
        table_start = f.tell()
        (
            fmt,
            count,
            strings_offset,
        ) = struct.unpack(">HHH", f.read(6))
        if fmt == 1:
            (langtag_count,) = struct.unpack(">H", f.read(2))
            for langtag_record in range(langtag_count):
                (langtag_len, langtag_offset) = struct.unpack(">HH", f.read(4))

        font_family = None
        font_subfamily = None
        font_name = None
        for record_index in range(count):
            (
                platform_id,
                platform_specific_id,
                language_id,
                name_id,
                length,
                record_offset,
            ) = struct.unpack(">HHHHHH", f.read(2 * 6))
            pos = table_start + strings_offset + record_offset
            if name_id == 1:
                font_family = get_string(f, pos, length)
            elif name_id == 2:
                font_family = get_string(f, pos, length)
            elif name_id == 4:
                font_name = get_string(f, pos, length)
            if font_family and font_subfamily and font_name:
                break
        return font_family, font_subfamily, font_name

moi15moi · 2024-01-26T18:20:50Z

@tatarize If you really want to decode it by yourself, you should use the right encoding.

Here is what GDI (windows) does:

    def get_name_encoding(name: NameRecord) -> Optional[str]:
        """
        Parameters:
            names (NameRecord): Name record from the naming record
        Returns:
            The cmap codepoint encoding.
            If GDI does not support the name, return None.
        """
        # From: https://github.com/MicrosoftDocs/typography-issues/issues/956#issuecomment-1205678068
        if name.platformID == 3:
            if name.platEncID == 3:
                return "cp936"
            elif name.platEncID == 4:
                if name.nameID == 2:
                    return "utf_16_be"
                else:
                    return "cp950"
            elif name.platEncID == 5:
                if name.nameID == 2:
                    return "utf_16_be"
                else:
                    return "cp949"
            else:
                return "utf_16_be"
        elif name.platformID == 1 and name.platEncID == 0:
            return "iso-8859-1"

        return None

    @staticmethod
    def get_decoded_name(name: NameRecord) -> str:
        """
        Parameters:
            names (NameRecord): Name record from the naming record
        Returns:
            The decoded name
        """

        encoding = FontParser.get_name_encoding(name)

        if name.platformID == 3 and encoding != "utf_16_be":
            # Compatibility for really old font
            name_to_decode = name.string.replace(b"\x00", b"")
        else:
            name_to_decode = name.string

        return name_to_decode.decode(encoding)

	#!/usr/bin/env python

	"""
	From
	https://github.com/gddc/ttfquery/blob/master/ttfquery/describe.py
	and
	http://www.starrhorne.com/2012/01/18/how-to-extract-font-names-from-ttf-files-using-python-and-our-old-friend-the-command-line.html

	ported to Python 3
	"""

	import sys
	from fontTools import ttLib

	FONT_SPECIFIER_NAME_ID = 4
	FONT_SPECIFIER_FAMILY_ID = 1
	def shortName( font ):
	"""Get the short name from the font's names table"""
	name = ""
	family = ""
	for record in font['name'].names:
	if b'\x00' in record.string:
	name_str = record.string.decode('utf-16-be')
	else:
	name_str = record.string.decode('utf-8')
	if record.nameID == FONT_SPECIFIER_NAME_ID and not name:
	name = name_str
	elif record.nameID == FONT_SPECIFIER_FAMILY_ID and not family:
	family = name_str
	if name and family: break
	return name, family

	tt = ttLib.TTFont(sys.argv[1])
	print("Name: %s Family: %s" % shortName(tt))

pklaus/get_name.py

tatarize commented Jan 26, 2024

Uh oh!

moi15moi commented Jan 26, 2024

Uh oh!