Last active
January 26, 2024 05:30
-
-
Save lionel-rowe/6fd96f1c9fa34954fb986c9819b4f000 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getLocaleInfoModule() { | |
const endpoint = 'https://query.wikidata.org/sparql' | |
const headers = { 'Accept': 'application/sparql-results+json' } | |
type Kind = 'language' | 'region' | |
const propertyIds: Record<Kind, { iso2: number; iso3: number }> = { | |
language: { | |
iso2: 218, | |
iso3: 220, | |
}, | |
region: { | |
iso2: 297, | |
iso3: 298, | |
}, | |
} | |
function query(name: string, kind: Kind) { | |
const query = String.raw` | |
SELECT DISTINCT ?item ?name ?iso2 ?iso3 (group_concat(?alias; separator="\t") as ?aliases) | |
WHERE { | |
BIND(${JSON.stringify(name.trim())}@en AS ?q) . | |
BIND(LCASE(?q) as ?query) . | |
?item rdfs:label ?name . | |
?item wdt:P${propertyIds[kind].iso3} ?iso3 . | |
OPTIONAL { ?item wdt:P${propertyIds[kind].iso2} ?iso2 . } | |
OPTIONAL { ?item skos:altLabel ?alias . } | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } | |
FILTER (lang(?alias) = "en") . | |
FILTER (lang(?name) = "en") . | |
FILTER( | |
CONTAINS(LCASE(?name), ?query) | |
|| CONTAINS(LCASE(?alias), ?query) | |
) . | |
} | |
GROUP BY ?item ?name ?iso2 ?iso3 ?aliases | |
LIMIT 1000 | |
` | |
return query | |
} | |
type Result = { | |
url: string | |
name: string | |
iso2?: string | |
iso3?: string | |
aliases: string[] | |
} | |
function getLocaleInfo(kind: Kind) { | |
const cache = new Map<string, Promise<Result | null>>() | |
return (name: string) => { | |
if (cache.has(name)) { | |
return cache.get(name)! | |
} | |
const url = new URL(endpoint) | |
url.searchParams.set('query', query(name, kind)) | |
const asyncGetter = async () => { | |
const res = await fetch(url.href, { headers }) | |
const data = await res.json() | |
const results: Result[] = data.results.bindings | |
.map((x: Record<string, unknown>) => { | |
return Object.fromEntries( | |
Object.entries(x).map(([k, _v]) => { | |
// deno-lint-ignore no-explicit-any | |
const v = _v as any | |
return k === 'aliases' | |
? [k, v.value.split('\t')] | |
: k === 'item' | |
? ['url', v.value] | |
: [k, v.value] | |
}), | |
) | |
}) | |
results | |
.sort((a, b) => { | |
function normalize(s: string) { | |
return s.trim().toLowerCase() | |
} | |
const normalized = normalize(name) | |
const fns: ((arg: Result) => number)[] = [ | |
// name matches input first | |
(x) => -Number(new Set([normalized, normalize(x.name)]).size === 1), | |
// aliases include input first | |
(x) => { | |
const idx = x.aliases.map(normalize).findIndex((x) => x === normalized) | |
if (idx === -1) return Infinity | |
return idx | |
}, | |
// name contains input first | |
(x) => -Number(normalize(x.name).includes(normalized)), | |
// aliases include one that contains input first | |
(x) => { | |
const idx = x.aliases.map(normalize).findIndex((x) => x.includes(normalized)) | |
if (idx === -1) return Infinity | |
return idx | |
}, | |
// has ISO-2 first | |
(x) => -Number(Boolean(x.iso2)), | |
] | |
return fns.reduce((val, fn) => val || (fn(a) - fn(b)), 0) | |
}) | |
const result = results.at(0) ?? null | |
return result | |
} | |
const promise = asyncGetter() | |
cache.set(name, promise) | |
return promise | |
} | |
} | |
const getInfoByName = { | |
language: getLocaleInfo('language'), | |
region: getLocaleInfo('region'), | |
} | |
return { getInfoByName } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment