Skip to content

Instantly share code, notes, and snippets.

@zbraniecki
Last active September 13, 2019 21:54
Show Gist options
  • Save zbraniecki/36d16cd796b35a2eacb47ecb609535cf to your computer and use it in GitHub Desktop.
Save zbraniecki/36d16cd796b35a2eacb47ecb609535cf to your computer and use it in GitHub Desktop.
find unused strings
import subprocess
import os
import re
exclude_formats = [
"*.dtd",
"*.properties",
]
include_formats = [
"*.xul",
"*.xhtml",
"*.xml.in",
"*.inc",
"*.js",
"*.jsm",
"*.dtd",
]
directories = [
"browser/",
"toolkit/",
"devtools/",
"dom/",
"docshell/",
"mobile/",
"layout/",
"security/",
"extensions/",
"widget/",
]
id_whitelist = [
"sharedLongDesc",
"syncBrand.shortName.label",
"syncBrand.fxAccount.label",
]
def presence_count(ids):
result = {}
for id in ids:
command = ["rg", "-c"]
# for format in exclude_formats:
# command.extend(["-g", "!{}".format(format)])
for format in include_formats:
command.extend(["-g", "{}".format(format)])
command.append(id)
for dir in directories:
command.append(dir)
res = subprocess.run(command, stdout=subprocess.PIPE, text=True)
result[id] = {}
for line in res.stdout.splitlines():
(path, count) = line.split(":")
result[id][path] = int(count)
return result
def find_files(path, ext):
result = []
res = subprocess.run(["find", path, "-name", ext], stdout=subprocess.PIPE, text=True)
for line in res.stdout.splitlines():
result.append(line)
return result
def get_ids_from_xml(source):
result = []
re_dtd = re.compile("&([a-zA-Z][^;]+);")
matches = re_dtd.findall(source)
for match in matches:
result.append(match)
return result
def get_ids_from_dtd(source):
result = []
re_dtd = re.compile("<!ENTITY\s*([^\s]+)\s")
matches = re_dtd.findall(source)
for match in matches:
result.append("&{};".format(match))
return result
def get_ids_from_ftl(source):
result = []
re_dtd = re.compile("^([a-zA-Z][^\s]+)\s=", re.M)
matches = re_dtd.findall(source)
for match in matches:
result.append(match)
return result
def get_ids_from_properties(source):
result = []
re_prop = re.compile("^([^\s=]+)\s*=", re.M)
matches = re_prop.findall(source)
for match in matches:
result.append(match)
return result
def check_file(path, extract):
print("Scanning for path: {}".format(path))
raw_data = open(path).read()
ids = extract(raw_data)
# print(ids)
r = presence_count(ids)
for id in r:
if not r[id]:
if id in id_whitelist:
continue
print("{}".format(id))
else:
# print("{}: {}".format(id, r[id]))
pass
def verify_xml_slice(source):
ids = get_ids_from_xml(source)
r = presence_count(ids)
print(r)
files = find_files("./mobile", "*.dtd")
for path in files:
check_file(path, get_ids_from_dtd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment