Skip to content

Instantly share code, notes, and snippets.

@enghqii
Created May 1, 2026 15:31
Show Gist options
  • Select an option

  • Save enghqii/4319e646545f43ebc75ebb8ca7be9c28 to your computer and use it in GitHub Desktop.

Select an option

Save enghqii/4319e646545f43ebc75ebb8ca7be9c28 to your computer and use it in GitHub Desktop.
pptx-font-normalizer
#!/usr/bin/env python3
import argparse
import collections
import os
import re
import sys
import zipfile
from dataclasses import dataclass
TYPEFACE_RE = re.compile(r'typeface="([^"]+)"')
EMBEDDED_BLOCK_RE = re.compile(
r"(<p:embeddedFont>\s*<p:font typeface=\"([^\"]+)\"[^>]*/>\s*<p:(regular|bold|italic|boldItalic)\s+r:id=\"([^\"]+)\"\s*/>\s*</p:embeddedFont>)"
)
@dataclass
class FontReport:
embedded_faces: collections.Counter
requested_faces: collections.Counter
missing_requested_faces: collections.Counter
def configure_stdout() -> None:
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8")
def parse_pair(value: str) -> tuple[str, str]:
if "=" not in value:
raise argparse.ArgumentTypeError("expected OLD=NEW")
old, new = value.split("=", 1)
old = old.strip()
new = new.strip()
if not old or not new:
raise argparse.ArgumentTypeError("expected OLD=NEW with non-empty values")
return old, new
def read_xml_parts(path: str) -> dict[str, str]:
xml_parts: dict[str, str] = {}
with zipfile.ZipFile(path, "r") as zin:
for name in zin.namelist():
if name.endswith(".xml"):
xml_parts[name] = zin.read(name).decode("utf-8")
return xml_parts
def build_report(xml_parts: dict[str, str]) -> FontReport:
embedded_faces: collections.Counter = collections.Counter()
requested_faces: collections.Counter = collections.Counter()
presentation = xml_parts.get("ppt/presentation.xml", "")
for _, face, _, _ in EMBEDDED_BLOCK_RE.findall(presentation):
embedded_faces[face] += 1
for name, text in xml_parts.items():
if name == "ppt/presentation.xml":
continue
for face in TYPEFACE_RE.findall(text):
requested_faces[face] += 1
missing = collections.Counter(
{
face: count
for face, count in requested_faces.items()
if face not in embedded_faces
}
)
return FontReport(
embedded_faces=embedded_faces,
requested_faces=requested_faces,
missing_requested_faces=missing,
)
def add_alias_entry(presentation_xml: str, alias: str, target: str) -> tuple[str, bool]:
if f'typeface="{alias}"' in presentation_xml:
return presentation_xml, False
match = None
for candidate in EMBEDDED_BLOCK_RE.finditer(presentation_xml):
if candidate.group(2) == target:
match = candidate
break
if not match:
return presentation_xml, False
alias_block = match.group(1).replace(
f'typeface="{target}"', f'typeface="{alias}"', 1
)
marker = "<p:embeddedFontLst>"
idx = presentation_xml.find(marker)
if idx == -1:
return presentation_xml, False
insert_at = idx + len(marker)
updated = presentation_xml[:insert_at] + alias_block + presentation_xml[insert_at:]
return updated, True
def write_pptx(
source_path: str,
output_path: str,
replacements: list[tuple[str, str]],
aliases: list[tuple[str, str]],
) -> tuple[int, int]:
replacement_count = 0
alias_count = 0
with zipfile.ZipFile(source_path, "r") as zin, zipfile.ZipFile(output_path, "w") as zout:
for info in zin.infolist():
data = zin.read(info.filename)
if info.filename.endswith(".xml"):
text = data.decode("utf-8")
for old, new in replacements:
count = text.count(old)
if count:
text = text.replace(old, new)
replacement_count += count
if info.filename == "ppt/presentation.xml":
for alias, target in aliases:
text, added = add_alias_entry(text, alias, target)
alias_count += int(added)
data = text.encode("utf-8")
copied = zipfile.ZipInfo(info.filename)
copied.date_time = info.date_time
copied.compress_type = info.compress_type
copied.comment = info.comment
copied.extra = info.extra
copied.create_system = info.create_system
copied.create_version = info.create_version
copied.extract_version = info.extract_version
copied.flag_bits = info.flag_bits
copied.volume = info.volume
copied.internal_attr = info.internal_attr
copied.external_attr = info.external_attr
zout.writestr(copied, data)
return replacement_count, alias_count
def print_counter(title: str, counter: collections.Counter, limit: int | None = None) -> None:
print(title)
if not counter:
print(" (none)")
return
items = counter.most_common(limit)
for face, count in items:
print(f" {count:5d} {face}")
def main() -> int:
configure_stdout()
parser = argparse.ArgumentParser(
description="Inspect and repair font face-name mismatches inside a PowerPoint .pptx package."
)
parser.add_argument("pptx", help="Path to the source .pptx")
parser.add_argument("--output", help="Write a repaired copy to this path")
parser.add_argument(
"--replace",
action="append",
type=parse_pair,
default=[],
help="Exact face-name replacement in OLD=NEW form. Repeat as needed.",
)
parser.add_argument(
"--alias",
action="append",
type=parse_pair,
default=[],
help="Add an embedded font alias in OLD=TARGET form. Repeat as needed.",
)
args = parser.parse_args()
xml_parts = read_xml_parts(args.pptx)
report = build_report(xml_parts)
print_counter("Embedded faces", report.embedded_faces)
print_counter("Requested faces", report.requested_faces, limit=30)
print_counter("Requested faces missing from embedded font table", report.missing_requested_faces)
if not args.output:
return 0
output_path = args.output
if os.path.abspath(output_path) == os.path.abspath(args.pptx):
print("Refusing to overwrite the source file directly. Use --output with a new path.", file=sys.stderr)
return 2
replacement_count, alias_count = write_pptx(
source_path=args.pptx,
output_path=output_path,
replacements=args.replace,
aliases=args.alias,
)
fixed_report = build_report(read_xml_parts(output_path))
print(f"Wrote repaired copy: {output_path}")
print(f"Exact replacements applied: {replacement_count}")
print(f"Embedded aliases added: {alias_count}")
print_counter(
"Remaining requested faces missing from embedded font table",
fixed_report.missing_requested_faces,
)
return 0
if __name__ == "__main__":
raise SystemExit(main())
name pptx-font-normalizer
description Inspect and repair PowerPoint `.pptx` files that render the same font inconsistently because slide XML requests one face name while the embedded font table exposes another. Use when Codex needs to diagnose font fallback, compare requested faces against embedded faces, normalize exact face-name mismatches, or write a repaired copy of a presentation.

PPTX Font Normalizer

Treat a .pptx as an Open XML package. Verify what the slide XML actually requests before changing anything.

Quick Start

Use the bundled script:

python "$env:CODEX_HOME\skills\pptx-font-normalizer\scripts\repair_pptx_fonts.py" `
  "C:\path\deck.pptx"

That prints:

  • embedded font faces from ppt/presentation.xml
  • requested typeface="..." counts across XML parts
  • requested faces that are not present in the embedded font table

Core Workflow

  1. Run a report first. Do not assume the visible issue is text encoding.
  2. Compare slide-requested face names with embedded font face names.
  3. If the same visual font appears under mixed localized and English names, normalize slide references to one canonical face.
  4. If PowerPoint still needs to resolve both names, add an embedded alias entry that points the alternate face name to the same embedded font relationship.
  5. Write a repaired copy instead of overwriting the source when the file may be open in PowerPoint.

Exact Commands

Report only:

python "$env:CODEX_HOME\skills\pptx-font-normalizer\scripts\repair_pptx_fonts.py" `
  "C:\path\deck.pptx"

Rewrite requested face names into a canonical embedded face:

python "$env:CODEX_HOME\skills\pptx-font-normalizer\scripts\repair_pptx_fonts.py" `
  "C:\path\deck.pptx" `
  --output "C:\path\deck.fixed.pptx" `
  --replace "넥슨Lv2고딕 Medium=NEXON Lv2 Gothic Medium" `
  --replace "넥슨Lv2고딕 Bold=NEXON Lv2 Gothic Bold"

Add embedded aliases so both names resolve to the same embedded font:

python "$env:CODEX_HOME\skills\pptx-font-normalizer\scripts\repair_pptx_fonts.py" `
  "C:\path\deck.pptx" `
  --output "C:\path\deck.fixed.pptx" `
  --replace "넥슨Lv2고딕 Medium=NEXON Lv2 Gothic Medium" `
  --replace "넥슨Lv2고딕 Bold=NEXON Lv2 Gothic Bold" `
  --alias "넥슨Lv2고딕 Medium=NEXON Lv2 Gothic Medium" `
  --alias "넥슨Lv2고딕 Bold=NEXON Lv2 Gothic Bold"

Interpretation

PowerPoint font resolution is often exact-name sensitive. The failure pattern to look for is:

  • slides request Localized Name Medium
  • embedded font table exposes English Name Medium
  • the English name renders correctly
  • the localized name falls back

When that happens, prefer one canonical requested face name across slides. Add aliases only when you need both names to resolve in the package.

Guardrails

  • Default to writing a new output file.
  • If the original deck is open, expect in-place replacement to fail on Windows.
  • Do not describe this as an encoding issue unless the XML itself is actually corrupted.
  • Keep replacements exact. Do not mass-rewrite partial substrings unless you have inspected the package first.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment