Last active
September 6, 2019 17:56
-
-
Save zbraniecki/422e08e23318ddf558de478c7cec8bc5 to your computer and use it in GitHub Desktop.
AWFY scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from arewefluentyet import data | |
import os | |
import subprocess | |
import json | |
from datetime import date, datetime, timedelta | |
# adjust this path to your local check-out of the gh-pages branch | |
REPO = "/Users/zbraniecki/projects/fluent/arewefluentyet.com/gh-pages" | |
MC = "/Users/zbraniecki/projects/mozilla-unified" | |
DATA_PATH = os.path.join(REPO, "./data") | |
FREQUENCY = timedelta(days=7) | |
def read_progress_data(): | |
path = os.path.join(DATA_PATH, "progress.json") | |
if os.path.exists(path): | |
return json.load(open(path)) | |
print("Warning: \"{}\" doesn't exist. Creating a new one.".format(path)) | |
return [] | |
def read_last_date(progress_data): | |
if len(progress_data) > 0: | |
return date(*(int(s) for s in progress_data[-1]["date"].split("-"))) | |
return None | |
def pick_next_revision(last_date): | |
# Hardcoded until I find a way to calculate | |
return "f594a688b3c42ce85e3382ef6de943fb8494ab16" | |
def get_current_revision(): | |
rev = subprocess.check_output([ | |
'hg', 'id', MC, '-T{id}' | |
]).decode('utf-8') | |
return rev | |
def get_revision_date(rev): | |
rev = subprocess.check_output([ | |
'hg', 'id', MC, '-r', rev, '-T', '{date|shortdate}' | |
]).decode('utf-8') | |
return rev | |
def switch_to_revision(rev): | |
os.chdir(MC) | |
resp = subprocess.check_call([ | |
"hg", "update", "-c", "-r", rev | |
]) | |
print(resp) | |
def extract_progress(dataset): | |
entries = [] | |
progress = {} | |
for subset in dataset: | |
for path, count in subset.items(): | |
entries.append({ | |
"file": path, | |
"count": count | |
}) | |
ext = os.path.splitext(path)[1] | |
if not ext: | |
continue | |
if ext.startswith("."): | |
ext = ext[1:] | |
if ext and ext not in progress: | |
progress[ext] = count | |
else: | |
progress[ext] += count | |
return (entries, progress) | |
def update_data(progress_data, next_revision): | |
current_revision = get_current_revision() | |
print("Your current revision is: {}.".format(current_revision)) | |
if current_revision != next_revision: | |
print("Updating data for revision: {}.".format(next_revision)) | |
switch_to_revision(next_revision) | |
else: | |
print("Collecting data for this revision.") | |
aggregator = data.Aggregator( | |
[os.path.join(MC, "browser/locales/l10n.toml")] | |
) | |
aggregator.load() | |
result = aggregator.gather() | |
rev_date = get_revision_date(next_revision) | |
if current_revision != next_revision: | |
switch_to_revision(current_revision) | |
(entries, progress) = extract_progress(result) | |
progress_data.append({ | |
"data": progress, | |
"date": rev_date, | |
"revision": next_revision, | |
}) | |
snapshot = { | |
"date": rev_date, | |
"revision": next_revision, | |
"data": entries | |
} | |
return snapshot | |
progress_data = read_progress_data() | |
last_date = read_last_date(progress_data) | |
next_revision = pick_next_revision(last_date) | |
snapshot_data = update_data(progress_data, next_revision) | |
json.dump( | |
progress_data, open(os.path.join(DATA_PATH, "progress.json"), "w"), | |
indent=0, | |
separators=(",", ": "), | |
sort_keys=True, | |
) | |
json.dump( | |
snapshot_data, open(os.path.join(DATA_PATH, "snapshot.json"), "w"), | |
indent=0, | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
REPO = "/Users/zbraniecki/projects/fluent/arewefluentyet.com/gh-pages" | |
PROGRESS_JSON = "/data/progress.json" | |
SNAPSHOT_JSON = "/data/snapshot.json" | |
def normalize_path(path): | |
start = path.find("mozilla-unified/") | |
return path[start + 16:] | |
global_json = json.load(open(REPO + PROGRESS_JSON)) | |
last_entry = None | |
rev = None | |
date = None | |
for i, entry in enumerate(global_json): | |
if i == len(global_json) - 1: | |
last_entry = entry['data'][0] | |
rev = entry['revision'] | |
date = entry['date'] | |
snap = { | |
"dtd": 0, | |
"inc": 0, | |
"ini": 0, | |
"ftl": 0, | |
"properties": 0, | |
} | |
for path in entry['data'][0]: | |
value = entry['data'][0][path] | |
p, ext = os.path.splitext(path) | |
if not ext: | |
ext = path | |
ext = ext[1:] | |
snap[ext] += value | |
entry['data'] = snap | |
entries = [] | |
for path in last_entry: | |
value = last_entry[path] | |
entries.append({ | |
"file": normalize_path(path), | |
"count": value, | |
}) | |
snapshot = { | |
"date": date, | |
"revision": rev, | |
"data": entries | |
} | |
json.dump( | |
global_json, open(REPO + PROGRESS_JSON, "w"), | |
indent=0, | |
separators=(",", ": "), | |
sort_keys=True, | |
) | |
json.dump( | |
snapshot, open(REPO + SNAPSHOT_JSON, "w"), | |
indent=0, | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
def collect_dtd(path): | |
dtds = [] | |
source = open(path).read() | |
re_entity = re.compile("<!ENTITY ([^ ]+) ") | |
matches = re_entity.findall(source) | |
for match in matches: | |
dtds.append(match) | |
return dtds | |
def collect_used_dtds(path): | |
dtds = [] | |
data = json.load(open(path)) | |
for entry in data["data"]: | |
if entry["type"] == "dtd": | |
dtds.append(entry["id"]) | |
return dtds | |
dtds = collect_dtd("/Users/zbraniecki/projects/mozilla-unified/browser/locales/en-US/chrome/browser/browser.dtd") | |
used_dtds = collect_used_dtds("/Users/zbraniecki/projects/fluent/arewefluentyet.com/snapshot.json") | |
for key in dtds: | |
if key not in used_dtds: | |
print(key) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment