Skip to content

Instantly share code, notes, and snippets.

@flodolo
Created March 28, 2021 15:11
Show Gist options
  • Save flodolo/34c3ff2e02d8dde270001d300f22e050 to your computer and use it in GitHub Desktop.
Save flodolo/34c3ff2e02d8dde270001d300f22e050 to your computer and use it in GitHub Desktop.
Check for unused DTDs in mozilla-central
#! /usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from collections import defaultdict
import re
import os
import sys
from compare_locales.parser import getParser
import hglib
includes = defaultdict(set)
def getContent(f):
try:
content = open(f).read()
d = os.path.dirname(f)
def inner(m):
p = os.path.normpath(os.path.join(d, m.group(1)))
includes[f].add(p)
return getContent(p)
fullContent = re.sub("^#include\s+([^\s]*?)\s*$", inner, content, flags=re.M)
except:
# print(f)
fullContent = ""
return fullContent
def getL10nUsage(f):
content = getContent(f)
keys = re.findall("&(.*?);", content, flags=re.M)
chromefiles = re.findall('"(chrome:.*?/locale/.*?dtd)"', content, flags=re.M)
return (f, keys, chromefiles)
base_path = "/Users/flodolo/mozilla/mercurial/mozilla-unified/"
client = hglib.open(base_path)
files = [t[-1].decode("utf-8") for t in client.manifest()]
client.close()
dtds = [f for f in files if f.endswith(".dtd")]
xmls = [
f
for f in files
if (f.endswith(".xul") or f.endswith(".xhtml") or f.endswith(".xml"))
and not ("/test" in f or "/reftest" in f or "/crashtest" in f)
]
data = []
for f in xmls:
data.append(getL10nUsage(os.path.join(base_path, f)))
n2f = defaultdict(list)
for d in dtds:
n2f[os.path.basename(d)].append(d)
b2f = dict((b, fs[0]) for b, fs in n2f.items() if len(fs) == 1)
parsed = {}
def ensureParsed(f):
if f in parsed:
return parsed[f]
p = getParser(f)
p.readFile(f)
entities = p.parse()
parsed[f] = set(e.key for e in entities)
return parsed[f]
for d in data:
_dtds = filter(None, (b2f.get(os.path.basename(c)) for c in d[-1]))
for _dtd in _dtds:
keys = ensureParsed(os.path.join(base_path, _dtd))
keys -= set(d[1])
for f, keys in sorted(parsed.items()):
if len(keys) == 0:
continue
print(f)
print("", " ".join(sorted(keys)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment