Created
March 28, 2021 15:11
-
-
Save flodolo/34c3ff2e02d8dde270001d300f22e050 to your computer and use it in GitHub Desktop.
Check for unused DTDs in mozilla-central
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# This Source Code Form is subject to the terms of the Mozilla Public | |
# License, v. 2.0. If a copy of the MPL was not distributed with this | |
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
from collections import defaultdict | |
import re | |
import os | |
import sys | |
from compare_locales.parser import getParser | |
import hglib | |
includes = defaultdict(set) | |
def getContent(f): | |
try: | |
content = open(f).read() | |
d = os.path.dirname(f) | |
def inner(m): | |
p = os.path.normpath(os.path.join(d, m.group(1))) | |
includes[f].add(p) | |
return getContent(p) | |
fullContent = re.sub("^#include\s+([^\s]*?)\s*$", inner, content, flags=re.M) | |
except: | |
# print(f) | |
fullContent = "" | |
return fullContent | |
def getL10nUsage(f): | |
content = getContent(f) | |
keys = re.findall("&(.*?);", content, flags=re.M) | |
chromefiles = re.findall('"(chrome:.*?/locale/.*?dtd)"', content, flags=re.M) | |
return (f, keys, chromefiles) | |
base_path = "/Users/flodolo/mozilla/mercurial/mozilla-unified/" | |
client = hglib.open(base_path) | |
files = [t[-1].decode("utf-8") for t in client.manifest()] | |
client.close() | |
dtds = [f for f in files if f.endswith(".dtd")] | |
xmls = [ | |
f | |
for f in files | |
if (f.endswith(".xul") or f.endswith(".xhtml") or f.endswith(".xml")) | |
and not ("/test" in f or "/reftest" in f or "/crashtest" in f) | |
] | |
data = [] | |
for f in xmls: | |
data.append(getL10nUsage(os.path.join(base_path, f))) | |
n2f = defaultdict(list) | |
for d in dtds: | |
n2f[os.path.basename(d)].append(d) | |
b2f = dict((b, fs[0]) for b, fs in n2f.items() if len(fs) == 1) | |
parsed = {} | |
def ensureParsed(f): | |
if f in parsed: | |
return parsed[f] | |
p = getParser(f) | |
p.readFile(f) | |
entities = p.parse() | |
parsed[f] = set(e.key for e in entities) | |
return parsed[f] | |
for d in data: | |
_dtds = filter(None, (b2f.get(os.path.basename(c)) for c in d[-1])) | |
for _dtd in _dtds: | |
keys = ensureParsed(os.path.join(base_path, _dtd)) | |
keys -= set(d[1]) | |
for f, keys in sorted(parsed.items()): | |
if len(keys) == 0: | |
continue | |
print(f) | |
print("", " ".join(sorted(keys))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment