Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save trunet/7154d763211e71a3d0483fa97153f949 to your computer and use it in GitHub Desktop.
Save trunet/7154d763211e71a3d0483fa97153f949 to your computer and use it in GitHub Desktop.
Function to fix and match ISO 3166 country names with ISO 4217 currency country names
import re
# Some use cases from the XML ISO 4217 vs ISO 3166 json
LIST_OF_COUNTRIES = {
"KOREA (THE DEMOCRATIC PEOPLE’S REPUBLIC OF)": "Korea, Democratic People's Republic of",
"KOREA (THE REPUBLIC OF)": "Korea, Republic of",
"UNITED STATES OF AMERICA (THE)": "United States of America",
"CONGO (THE DEMOCRATIC REPUBLIC OF THE)": "Congo, Democratic Republic of the",
"BOLIVIA (PLURINATIONAL STATE OF)": "Bolivia, Plurinational State of",
"COCOS (KEELING) ISLANDS (THE)": "Cocos (Keeling) Islands",
"VIRGIN ISLANDS (BRITISH)": "Virgin Islands (British)",
"VIRGIN ISLANDS (U.S.)": "Virgin Islands (U.S.)",
"TÜRKİYE": "Türkiye",
"SINT MAARTEN (DUTCH PART)": "Sint Maarten (Dutch part)",
"SAINT MARTIN (FRENCH PART)": "Saint Martin (French part)",
"FALKLAND ISLANDS (THE) [MALVINAS]": "Falkland Islands (Malvinas)",
"HEARD ISLAND AND McDONALD ISLANDS": "Heard Island and McDonald Islands",
}
def reformat_country(name):
# Strange characters replacement
name = name.replace('’', "'").replace('İ', 'I').upper()
# Remove standalone "(THE)"
name = re.sub(r'\s*\(THE\)\s*', '', name)
# Remove "(THE" when it's part of a multi-word phrase
name = re.sub(r' \(THE\s+([\w\s\']+)\)', r', \1', name)
# Handle square brackets separately
name = re.sub(r'\[([\w\s\']+)\]', r' (\1)', name)
# Replace parentheses with commas for multi-word parentheses
name = re.sub(
r' \(([\w\s\'\.]+)\)',
lambda m: ', ' + m.group(1) if ' ' in m.group(1) and 'PART' not in m.group(1) else ' (' + m.group(1) + ')',
name
)
return name
for country in LIST_OF_COUNTRIES:
print(f"{country} -> {reformat_country(country)} == {LIST_OF_COUNTRIES[country].upper()}({reformat_country(country) == LIST_OF_COUNTRIES[country].upper()})")
@trunet
Copy link
Author

trunet commented Jul 31, 2024

Output:

KOREA (THE DEMOCRATIC PEOPLE’S REPUBLIC OF) -> KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF == KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF(True)
KOREA (THE REPUBLIC OF) -> KOREA, REPUBLIC OF == KOREA, REPUBLIC OF(True)
UNITED STATES OF AMERICA (THE) -> UNITED STATES OF AMERICA == UNITED STATES OF AMERICA(True)
CONGO (THE DEMOCRATIC REPUBLIC OF THE) -> CONGO, DEMOCRATIC REPUBLIC OF THE == CONGO, DEMOCRATIC REPUBLIC OF THE(True)
BOLIVIA (PLURINATIONAL STATE OF) -> BOLIVIA, PLURINATIONAL STATE OF == BOLIVIA, PLURINATIONAL STATE OF(True)
COCOS (KEELING) ISLANDS (THE) -> COCOS (KEELING) ISLANDS == COCOS (KEELING) ISLANDS(True)
VIRGIN ISLANDS (BRITISH) -> VIRGIN ISLANDS (BRITISH) == VIRGIN ISLANDS (BRITISH)(True)
VIRGIN ISLANDS (U.S.) -> VIRGIN ISLANDS (U.S.) == VIRGIN ISLANDS (U.S.)(True)
TÜRKİYE -> TÜRKIYE == TÜRKIYE(True)
SINT MAARTEN (DUTCH PART) -> SINT MAARTEN (DUTCH PART) == SINT MAARTEN (DUTCH PART)(True)
SAINT MARTIN (FRENCH PART) -> SAINT MARTIN (FRENCH PART) == SAINT MARTIN (FRENCH PART)(True)
FALKLAND ISLANDS (THE) [MALVINAS] -> FALKLAND ISLANDS (MALVINAS) == FALKLAND ISLANDS (MALVINAS)(True)
HEARD ISLAND AND McDONALD ISLANDS -> HEARD ISLAND AND MCDONALD ISLANDS == HEARD ISLAND AND MCDONALD ISLANDS(True)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment