JoshCheek · August 10, 2023 11:44 · PhatDoHuu · Nov 4, 2021
diff --git a/parse_otf.rb b/parse_otf.rb
 # WOFF spec                                https://www.w3.org/TR/2012/REC-WOFF-20121213/
 # OTF spec                                 https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx
 # Really nice inspector                    https://opentype.js.org/font-inspector.html
 # unpack instructions                      http://www.rubydoc.info/stdlib/core/String#unpack-instance_method
 # Font programming instruction definitions https://developer.apple.com/fonts/TrueType-Reference-Manual/RM05/Chap5.html#WS
 # Font forge has some useful info, too, get it with homebrew cask
 FONT_FILE = '/Users/xjxc322/gamut/bots/pxgamut_regular.woff'

 require 'zlib'

 SIZES = {
  ''   => 0,
  'a4' => 4, # 4 chars, 8 bits each
  'L>' => 4, # 32 bits unsigned
  'l>' => 4, # 32 bits signed
  'S>' => 2, # 16 bits unsigned
  's>' => 2, # 16 bits signed
 }

 def read_and_unpack(binary, format_hash)
  struct = Struct.new(*format_hash.keys).new
  format_hash.each do |name, format|
    struct[name] = binary.unpack(format)[0]
    binary = binary[SIZES.fetch(format)..-1]
  end
  return struct, binary
 end



 rest = font = File.read(FONT_FILE, encoding: 'ASCII-8BIT')

 header, rest = read_and_unpack(
  rest,
  signature:        "a4",
  flavor:           "a4",
  length:           "L>",
  num_tables:       "S>",
  reserved:         "S>",
  total_sfnt_size:  "L>",
  major_version:    "S>",
  minor_version:    "S>",
  meta_offset:      "L>",
  meta_length:      "L>",
  meta_orig_length: "L>",
  priv_offset:      "L>",
  priv_length:      "L>",
 )
 header
 # => #<struct 
 #     signature="wOFF",
 #     flavor="OTTO",
 #     length=69456,
 #     num_tables=12,
 #     reserved=0,
 #     total_sfnt_size=133632,
 #     major_version=0,
 #     minor_version=0,
 #     meta_offset=0,
 #     meta_length=0,
 #     meta_orig_length=0,
 #     priv_offset=0,
 #     priv_length=0>

 entries = header.num_tables.times.map do
  entry, rest = read_and_unpack(
    rest,
    tag:           "a4",
    offset:        "L>",
    comp_length:   "L>",
    orig_length:   "L>",
    orig_checksum: "L>",
  )
  entry
 end
 entries.sort_by!(&:offset)

 [%w[TAG    OFFSET   COMPUTED_LEN   ORIGINAL_LEN   CHECKSUM],
 %w[------ -------- -------------- -------------- ----------],
 *entries.map(&:to_a)
 ].map { |row| "%-6s%-8s%-14s%-14s%-10s" % row }
 # => ["TAG   OFFSET  COMPUTED_LEN  ORIGINAL_LEN  CHECKSUM  ",
 #     "----------------------------------------------------",
 #     "head  284     54            54            208499658 ",
 #     "hhea  340     33            36            247336158 ",
 #     "maxp  376     6             6             42094592  ",
 #     "OS/2  384     89            96            2363395350",
 #     "name  476     499           1190          1917503162",
 #     "cmap  976     1103          1712          3922624268",
 #     "hmtx  2080    1326          2568          293570700 ",
 #     "post  3408    19            32            4290248754",
 #     "kern  3428    30983         65538         1282694526",
 #     "GPOS  34412   4679          17102         3357198945",
 #     "GSUB  39092   3235          8266          1306928084",
 #     "CFF   42328   27126         36814         3912817438"]

 tables = entries.map do |e|
  table = e.orig_length <= e.comp_length ?
            font[e.offset, e.orig_length] :
            Zlib::Inflate.inflate(font[e.offset, e.comp_length])
  [e.tag, table]
 end.to_h

 # Hmm, not sure I'm parsing this right, any
 glyph_substitution, offsets_binary = read_and_unpack(
  tables['GSUB'],
  major_version:        "S>",
  minor_version:        "S>",
  script_list_offset:   "s>",
  feature_list_offset:  "s>",
  lookup_list_offset:   "s>",
  substitution_format:  "S>", # 2
  coverage_offset:      "s>",
  glyph_count:          "S>",
  substitute_glyph_ids: "",
 )
 offsets_binary.length # => 8250
 glyph_substitution
 # => #<struct 
 #     major_version=1,
 #     minor_version=0,
 #     script_list_offset=10,
 #     feature_list_offset=296,
 #     lookup_list_offset=1780,
 #     substitution_format=2,
 #     coverage_offset=17478,
 #     glyph_count=19540,
 #     substitute_glyph_ids=nil>
	# WOFF spec https://www.w3.org/TR/2012/REC-WOFF-20121213/
	# OTF spec https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx
	# Really nice inspector https://opentype.js.org/font-inspector.html
	# unpack instructions http://www.rubydoc.info/stdlib/core/String#unpack-instance_method
	# Font programming instruction definitions https://developer.apple.com/fonts/TrueType-Reference-Manual/RM05/Chap5.html#WS
	# Font forge has some useful info, too, get it with homebrew cask
	FONT_FILE = '/Users/xjxc322/gamut/bots/pxgamut_regular.woff'

	require 'zlib'

	SIZES = {
	'' => 0,
	'a4' => 4, # 4 chars, 8 bits each
	'L>' => 4, # 32 bits unsigned
	'l>' => 4, # 32 bits signed
	'S>' => 2, # 16 bits unsigned
	's>' => 2, # 16 bits signed
	}

	def read_and_unpack(binary, format_hash)
	struct = Struct.new(*format_hash.keys).new
	format_hash.each do \|name, format\|
	struct[name] = binary.unpack(format)[0]
	binary = binary[SIZES.fetch(format)..-1]
	end
	return struct, binary
	end



	rest = font = File.read(FONT_FILE, encoding: 'ASCII-8BIT')

	header, rest = read_and_unpack(
	rest,
	signature: "a4",
	flavor: "a4",
	length: "L>",
	num_tables: "S>",
	reserved: "S>",
	total_sfnt_size: "L>",
	major_version: "S>",
	minor_version: "S>",
	meta_offset: "L>",
	meta_length: "L>",
	meta_orig_length: "L>",
	priv_offset: "L>",
	priv_length: "L>",
	)
	header
	# => #<struct
	# signature="wOFF",
	# flavor="OTTO",
	# length=69456,
	# num_tables=12,
	# reserved=0,
	# total_sfnt_size=133632,
	# major_version=0,
	# minor_version=0,
	# meta_offset=0,
	# meta_length=0,
	# meta_orig_length=0,
	# priv_offset=0,
	# priv_length=0>

	entries = header.num_tables.times.map do
	entry, rest = read_and_unpack(
	rest,
	tag: "a4",
	offset: "L>",
	comp_length: "L>",
	orig_length: "L>",
	orig_checksum: "L>",
	)
	entry
	end
	entries.sort_by!(&:offset)

	[%w[TAG OFFSET COMPUTED_LEN ORIGINAL_LEN CHECKSUM],
	%w[------ -------- -------------- -------------- ----------],
	*entries.map(&:to_a)
	].map { \|row\| "%-6s%-8s%-14s%-14s%-10s" % row }
	# => ["TAG OFFSET COMPUTED_LEN ORIGINAL_LEN CHECKSUM ",
	# "----------------------------------------------------",
	# "head 284 54 54 208499658 ",
	# "hhea 340 33 36 247336158 ",
	# "maxp 376 6 6 42094592 ",
	# "OS/2 384 89 96 2363395350",
	# "name 476 499 1190 1917503162",
	# "cmap 976 1103 1712 3922624268",
	# "hmtx 2080 1326 2568 293570700 ",
	# "post 3408 19 32 4290248754",
	# "kern 3428 30983 65538 1282694526",
	# "GPOS 34412 4679 17102 3357198945",
	# "GSUB 39092 3235 8266 1306928084",
	# "CFF 42328 27126 36814 3912817438"]

	tables = entries.map do \|e\|
	table = e.orig_length <= e.comp_length ?
	font[e.offset, e.orig_length] :
	Zlib::Inflate.inflate(font[e.offset, e.comp_length])
	[e.tag, table]
	end.to_h

	# Hmm, not sure I'm parsing this right, any
	glyph_substitution, offsets_binary = read_and_unpack(
	tables['GSUB'],
	major_version: "S>",
	minor_version: "S>",
	script_list_offset: "s>",
	feature_list_offset: "s>",
	lookup_list_offset: "s>",
	substitution_format: "S>", # 2
	coverage_offset: "s>",
	glyph_count: "S>",
	substitute_glyph_ids: "",
	)
	offsets_binary.length # => 8250
	glyph_substitution
	# => #<struct
	# major_version=1,
	# minor_version=0,
	# script_list_offset=10,
	# feature_list_offset=296,
	# lookup_list_offset=1780,
	# substitution_format=2,
	# coverage_offset=17478,
	# glyph_count=19540,
	# substitute_glyph_ids=nil>