Created
July 31, 2024 04:26
-
-
Save trunet/7154d763211e71a3d0483fa97153f949 to your computer and use it in GitHub Desktop.
Function to fix and match ISO 3166 country names with ISO 4217 currency country names
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# Some use cases from the XML ISO 4217 vs ISO 3166 json | |
LIST_OF_COUNTRIES = { | |
"KOREA (THE DEMOCRATIC PEOPLE’S REPUBLIC OF)": "Korea, Democratic People's Republic of", | |
"KOREA (THE REPUBLIC OF)": "Korea, Republic of", | |
"UNITED STATES OF AMERICA (THE)": "United States of America", | |
"CONGO (THE DEMOCRATIC REPUBLIC OF THE)": "Congo, Democratic Republic of the", | |
"BOLIVIA (PLURINATIONAL STATE OF)": "Bolivia, Plurinational State of", | |
"COCOS (KEELING) ISLANDS (THE)": "Cocos (Keeling) Islands", | |
"VIRGIN ISLANDS (BRITISH)": "Virgin Islands (British)", | |
"VIRGIN ISLANDS (U.S.)": "Virgin Islands (U.S.)", | |
"TÜRKİYE": "Türkiye", | |
"SINT MAARTEN (DUTCH PART)": "Sint Maarten (Dutch part)", | |
"SAINT MARTIN (FRENCH PART)": "Saint Martin (French part)", | |
"FALKLAND ISLANDS (THE) [MALVINAS]": "Falkland Islands (Malvinas)", | |
"HEARD ISLAND AND McDONALD ISLANDS": "Heard Island and McDonald Islands", | |
} | |
def reformat_country(name): | |
# Strange characters replacement | |
name = name.replace('’', "'").replace('İ', 'I').upper() | |
# Remove standalone "(THE)" | |
name = re.sub(r'\s*\(THE\)\s*', '', name) | |
# Remove "(THE" when it's part of a multi-word phrase | |
name = re.sub(r' \(THE\s+([\w\s\']+)\)', r', \1', name) | |
# Handle square brackets separately | |
name = re.sub(r'\[([\w\s\']+)\]', r' (\1)', name) | |
# Replace parentheses with commas for multi-word parentheses | |
name = re.sub( | |
r' \(([\w\s\'\.]+)\)', | |
lambda m: ', ' + m.group(1) if ' ' in m.group(1) and 'PART' not in m.group(1) else ' (' + m.group(1) + ')', | |
name | |
) | |
return name | |
for country in LIST_OF_COUNTRIES: | |
print(f"{country} -> {reformat_country(country)} == {LIST_OF_COUNTRIES[country].upper()}({reformat_country(country) == LIST_OF_COUNTRIES[country].upper()})") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output: