zbraniecki · September 13, 2019 21:54
diff --git a/id-count.py b/id-count.py
 import subprocess
 import os
 import re

 exclude_formats = [
    "*.dtd",
    "*.properties",
 ]

 include_formats = [
    "*.xul",
    "*.xhtml",
    "*.xml.in",
    "*.inc",
    "*.js",
    "*.jsm",
    "*.dtd",
 ]

 directories = [
    "browser/",
    "toolkit/",
    "devtools/",
    "dom/",
    "docshell/",
    "mobile/",
    "layout/",
    "security/",
    "extensions/",
    "widget/",
 ]

 id_whitelist = [
    "sharedLongDesc",
    "syncBrand.shortName.label",
    "syncBrand.fxAccount.label",
 ]

 def presence_count(ids):
    result = {}
    for id in ids:
        command = ["rg", "-c"]
        # for format in exclude_formats:
        #     command.extend(["-g", "!{}".format(format)])
        for format in include_formats:
            command.extend(["-g", "{}".format(format)])
        command.append(id)
        for dir in directories:
            command.append(dir)
        res = subprocess.run(command, stdout=subprocess.PIPE, text=True)
        result[id] = {}
        for line in res.stdout.splitlines():
            (path, count) = line.split(":")
            result[id][path] = int(count)

    return result

 def find_files(path, ext):
    result = []
    res = subprocess.run(["find", path, "-name", ext], stdout=subprocess.PIPE, text=True)
    for line in res.stdout.splitlines():
        result.append(line)
    return result

 def get_ids_from_xml(source):
    result = []
    re_dtd = re.compile("&([a-zA-Z][^;]+);")
    matches = re_dtd.findall(source)
    for match in matches:
        result.append(match)
    return result

 def get_ids_from_dtd(source):
    result = []
    re_dtd = re.compile("<!ENTITY\s*([^\s]+)\s")
    matches = re_dtd.findall(source)
    for match in matches:
        result.append("&{};".format(match))
    return result

 def get_ids_from_ftl(source):
    result = []
    re_dtd = re.compile("^([a-zA-Z][^\s]+)\s=", re.M)
    matches = re_dtd.findall(source)
    for match in matches:
        result.append(match)
    return result

 def get_ids_from_properties(source):
    result = []
    re_prop = re.compile("^([^\s=]+)\s*=", re.M)
    matches = re_prop.findall(source)
    for match in matches:
        result.append(match)
    return result

 def check_file(path, extract):
    print("Scanning for path: {}".format(path))
    raw_data = open(path).read()

    ids = extract(raw_data)

    # print(ids)
    r = presence_count(ids)
    for id in r:
        if not r[id]:
            if id in id_whitelist:
                continue
            print("{}".format(id))
        else:
            # print("{}: {}".format(id, r[id]))
            pass


 def verify_xml_slice(source):
    ids = get_ids_from_xml(source)
    r = presence_count(ids)
    print(r)

 files = find_files("./mobile", "*.dtd")
 for path in files:
    check_file(path, get_ids_from_dtd)
	import subprocess
	import os
	import re

	exclude_formats = [
	"*.dtd",
	"*.properties",
	]

	include_formats = [
	"*.xul",
	"*.xhtml",
	"*.xml.in",
	"*.inc",
	"*.js",
	"*.jsm",
	"*.dtd",
	]

	directories = [
	"browser/",
	"toolkit/",
	"devtools/",
	"dom/",
	"docshell/",
	"mobile/",
	"layout/",
	"security/",
	"extensions/",
	"widget/",
	]

	id_whitelist = [
	"sharedLongDesc",
	"syncBrand.shortName.label",
	"syncBrand.fxAccount.label",
	]

	def presence_count(ids):
	result = {}
	for id in ids:
	command = ["rg", "-c"]
	# for format in exclude_formats:
	# command.extend(["-g", "!{}".format(format)])
	for format in include_formats:
	command.extend(["-g", "{}".format(format)])
	command.append(id)
	for dir in directories:
	command.append(dir)
	res = subprocess.run(command, stdout=subprocess.PIPE, text=True)
	result[id] = {}
	for line in res.stdout.splitlines():
	(path, count) = line.split(":")
	result[id][path] = int(count)

	return result

	def find_files(path, ext):
	result = []
	res = subprocess.run(["find", path, "-name", ext], stdout=subprocess.PIPE, text=True)
	for line in res.stdout.splitlines():
	result.append(line)
	return result

	def get_ids_from_xml(source):
	result = []
	re_dtd = re.compile("&([a-zA-Z][^;]+);")
	matches = re_dtd.findall(source)
	for match in matches:
	result.append(match)
	return result

	def get_ids_from_dtd(source):
	result = []
	re_dtd = re.compile("<!ENTITY\s*([^\s]+)\s")
	matches = re_dtd.findall(source)
	for match in matches:
	result.append("&{};".format(match))
	return result

	def get_ids_from_ftl(source):
	result = []
	re_dtd = re.compile("^([a-zA-Z][^\s]+)\s=", re.M)
	matches = re_dtd.findall(source)
	for match in matches:
	result.append(match)
	return result

	def get_ids_from_properties(source):
	result = []
	re_prop = re.compile("^([^\s=]+)\s*=", re.M)
	matches = re_prop.findall(source)
	for match in matches:
	result.append(match)
	return result

	def check_file(path, extract):
	print("Scanning for path: {}".format(path))
	raw_data = open(path).read()

	ids = extract(raw_data)

	# print(ids)
	r = presence_count(ids)
	for id in r:
	if not r[id]:
	if id in id_whitelist:
	continue
	print("{}".format(id))
	else:
	# print("{}: {}".format(id, r[id]))
	pass


	def verify_xml_slice(source):
	ids = get_ids_from_xml(source)
	r = presence_count(ids)
	print(r)

	files = find_files("./mobile", "*.dtd")
	for path in files:
	check_file(path, get_ids_from_dtd)