Last active
January 2, 2023 14:14
-
-
Save flodolo/5bf943d88f9a5d2725789728aae077cd to your computer and use it in GitHub Desktop.
Get Mozilla region names as JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# This Source Code Form is subject to the terms of the Mozilla Public | |
# License, v. 2.0. If a copy of the MPL was not distributed with this | |
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
from urllib.request import urlopen | |
import argparse | |
import json | |
import os | |
import sys | |
from compare_locales import parser | |
def parse_region_file(file_content, storage): | |
file_parser = parser.getParser(".ftl") | |
file_parser.readContents(file_content) | |
try: | |
entities = file_parser.parse() | |
for entity in entities: | |
# Ignore Junk | |
if isinstance(entity, parser.Junk): | |
continue | |
if entity.raw_val is not None: | |
region_code = str(entity).replace("region-name-", "").split("-")[0] | |
storage[region_code] = entity.raw_val | |
except Exception as e: | |
print("Error parsing remote file.") | |
print(e) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--path", | |
required=True, | |
dest="output_path", | |
help="Path to output folder for JSON files", | |
) | |
parser.add_argument("locales", nargs="*", help="Locales to process") | |
args = parser.parse_args() | |
if not os.path.isdir(args.output_path): | |
sys.exit(f"{args.output_path} does not exist.") | |
# Get the list of locales shipping in Firefox (official versions) | |
locales = [] | |
try: | |
print("Getting list of Firefox locales") | |
url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/browser/locales/all-locales" | |
response = urlopen(url) | |
locales = [loc.strip().decode("utf-8") for loc in response.readlines()] | |
# Remove outliers and sort | |
excluded_locales = {"ca-valencia", "en-US", "ja-JP-mac"} | |
locales = list(set(locales) - excluded_locales) | |
locales.sort() | |
except Exception as e: | |
sys.exit(f"Error reading the list of locales: {e}") | |
regions = {} | |
# Read region names from en-US | |
try: | |
print("Getting list of region names for: en-US") | |
url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/toolkit/locales/en-US/toolkit/intl/regionNames.ftl" | |
response = urlopen(url) | |
file_content = response.read() | |
locale_regions = {} | |
parse_region_file(file_content, locale_regions) | |
regions["en-US"] = locale_regions | |
reference_regions = set(locale_regions.keys()) | |
except Exception as e: | |
sys.exit(f"Error reading remote regionNames.ftl for en-US: {e}") | |
# Extract data from all locales | |
for locale in locales: | |
try: | |
print(f"Getting list of region names for: {locale}") | |
url = f"https://hg.mozilla.org/l10n-central/{locale}/raw-file/default/toolkit/toolkit/intl/regionNames.ftl" | |
response = urlopen(url) | |
file_content = response.read() | |
locale_regions = {} | |
parse_region_file(file_content, locale_regions) | |
regions[locale] = locale_regions | |
# Use en-US values if a locale is missing translations | |
missing_regions = reference_regions - set(locale_regions.keys()) | |
for r in missing_regions: | |
regions[locale][r] = regions["en-US"][r] | |
except Exception as e: | |
print(f"Error reading remote regionNames.ftl for {locale}: {e}") | |
# Save JSON files | |
for locale, locale_data in regions.items(): | |
output_file = os.path.join(args.output_path, f"{locale}.json") | |
with open(output_file, "w", encoding="utf8") as f: | |
json.dump(locale_data, f, indent=4, sort_keys=True, ensure_ascii=True) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment