Skip to content

Instantly share code, notes, and snippets.

@flodolo
Last active January 2, 2023 14:14
Show Gist options
  • Save flodolo/5bf943d88f9a5d2725789728aae077cd to your computer and use it in GitHub Desktop.
Save flodolo/5bf943d88f9a5d2725789728aae077cd to your computer and use it in GitHub Desktop.
Get Mozilla region names as JSON
#! /usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from urllib.request import urlopen
import argparse
import json
import os
import sys
from compare_locales import parser
def parse_region_file(file_content, storage):
file_parser = parser.getParser(".ftl")
file_parser.readContents(file_content)
try:
entities = file_parser.parse()
for entity in entities:
# Ignore Junk
if isinstance(entity, parser.Junk):
continue
if entity.raw_val is not None:
region_code = str(entity).replace("region-name-", "").split("-")[0]
storage[region_code] = entity.raw_val
except Exception as e:
print("Error parsing remote file.")
print(e)
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--path",
required=True,
dest="output_path",
help="Path to output folder for JSON files",
)
parser.add_argument("locales", nargs="*", help="Locales to process")
args = parser.parse_args()
if not os.path.isdir(args.output_path):
sys.exit(f"{args.output_path} does not exist.")
# Get the list of locales shipping in Firefox (official versions)
locales = []
try:
print("Getting list of Firefox locales")
url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/browser/locales/all-locales"
response = urlopen(url)
locales = [loc.strip().decode("utf-8") for loc in response.readlines()]
# Remove outliers and sort
excluded_locales = {"ca-valencia", "en-US", "ja-JP-mac"}
locales = list(set(locales) - excluded_locales)
locales.sort()
except Exception as e:
sys.exit(f"Error reading the list of locales: {e}")
regions = {}
# Read region names from en-US
try:
print("Getting list of region names for: en-US")
url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/toolkit/locales/en-US/toolkit/intl/regionNames.ftl"
response = urlopen(url)
file_content = response.read()
locale_regions = {}
parse_region_file(file_content, locale_regions)
regions["en-US"] = locale_regions
reference_regions = set(locale_regions.keys())
except Exception as e:
sys.exit(f"Error reading remote regionNames.ftl for en-US: {e}")
# Extract data from all locales
for locale in locales:
try:
print(f"Getting list of region names for: {locale}")
url = f"https://hg.mozilla.org/l10n-central/{locale}/raw-file/default/toolkit/toolkit/intl/regionNames.ftl"
response = urlopen(url)
file_content = response.read()
locale_regions = {}
parse_region_file(file_content, locale_regions)
regions[locale] = locale_regions
# Use en-US values if a locale is missing translations
missing_regions = reference_regions - set(locale_regions.keys())
for r in missing_regions:
regions[locale][r] = regions["en-US"][r]
except Exception as e:
print(f"Error reading remote regionNames.ftl for {locale}: {e}")
# Save JSON files
for locale, locale_data in regions.items():
output_file = os.path.join(args.output_path, f"{locale}.json")
with open(output_file, "w", encoding="utf8") as f:
json.dump(locale_data, f, indent=4, sort_keys=True, ensure_ascii=True)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment