Last active
November 11, 2023 17:08
-
-
Save 2torus/b8152914ca6c6bb08ec3c6df4895b9a1 to your computer and use it in GitHub Desktop.
Compute EU membership on a given date with Python by using data from Wikipedia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from datetime import date | |
from typing import List | |
from functools import cache | |
from warnings import warn | |
import re | |
MEMBERSHIP_URL = 'https://en.wikipedia.org/wiki/Member_state_of_the_European_Union' | |
MEMBERSHIP_URL_PERM_LINK = 'https://en.wikipedia.org/w/index.php?title=Member_state_of_the_European_Union&oldid=1184106841' | |
CANONICAL_WIKI_DATE = date(2023, 11, 11) | |
TREATY_OF_MAASTRICHT_EFFECTIVE_DATE = date(1993, 11, 1) | |
def _clean_names(df): | |
df.loc[:, 'Country'] = df['Country'].str.replace('\[.*\]','', regex=True) | |
def _split_founders_joiners(df): | |
is_founder = df['Accession'].str.match('Founder.*') | |
founders = df[is_founder] | |
joiners = df[~is_founder] | |
joiners.loc[:, 'Accession'] = pd.to_datetime(joiners['Accession']).map(lambda dt: dt.date()) | |
return founders, joiners | |
@cache | |
def _wiki_data(canonical=False): | |
if canonical: | |
tables = pd.read_html(MEMBERSHIP_URL_PERM_LINK, match = 'Population', parse_dates=True) | |
else: | |
tables = pd.read_html(MEMBERSHIP_URL, match = 'Population', parse_dates=True) | |
exited = [table for table in tables if 'Accession' in table.columns and 'Withdrawal' in table.columns][0] | |
exited = exited[['Country', 'Accession', 'Withdrawal']] | |
exited.loc[:, 'Withdrawal'] = pd.to_datetime(exited['Withdrawal']).map(lambda dt: dt.date()) | |
_clean_names(exited) | |
exited_founders, exited_joiners = _split_founders_joiners(exited) | |
remain = [table for table in tables if 'Accession' in table.columns and 'Withdrawal' not in table.columns][0] | |
remain = remain[['Country', 'Accession']] | |
_clean_names(remain) | |
remain_founders, remain_joiners = _split_founders_joiners(remain) | |
return ((remain_founders, remain_joiners), (exited_founders, exited_joiners)) | |
def eu_members(dt: date, founding_date = TREATY_OF_MAASTRICHT_EFFECTIVE_DATE, canonical=False) -> List[str]: | |
""" | |
Returns the list of EU members on a given date using Wikipedia data from https://en.wikipedia.org/w/index.php?title=Member_state_of_the_European_Union | |
Useful in cases when data is tabulated both for EU and its member states, e.g. https://www.census.gov/foreign-trade/balance/. | |
founding_date - default to Maastrich Treaty effective date, see https://en.wikipedia.org/wiki/Maastricht_Treaty | |
canonical - use a frozen link Wikipedia data at the time of the writing of the code but have outdated data | |
>>> eu_members(date(1992, 1, 1)) | |
[] | |
>>> eu_members(date(1998, 1, 1)) | |
['Austria', 'Belgium', 'Denmark', 'Finland', 'France', 'Germany', 'Greece', 'Ireland', 'Italy', 'Luxembourg', 'Netherlands', 'Portugal', 'Spain', 'Sweden', 'United Kingdom'] | |
>>> eu_members(date(2019, 1, 1)) | |
['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'United Kingdom'] | |
>>> eu_members(date(2023, 1, 1)) | |
['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden'] | |
""" | |
if canonical and dt > canonical_wiki_date: | |
warn(f'Using Wikipedia data as of {canonical_wiki_date}, might be outdated') | |
((remain_founders, remain_joiners), (exited_founders, exited_joiners)) = _wiki_data() | |
if dt < founding_date: | |
return [] | |
exited_founders = exited_founders[exited_founders['Withdrawal'] < dt]['Country'].tolist() | |
remain_founders = remain_founders['Country'].tolist() | |
remain_joiners = remain_joiners[remain_joiners['Accession'] <= dt]['Country'].tolist() | |
exited_joiners = exited_joiners[(exited_joiners['Accession'] <= dt)&(exited_joiners['Withdrawal'] >= dt)]['Country'].tolist() | |
return sorted([ *remain_founders, *exited_founders, *remain_joiners, *exited_joiners ]) | |
if __name__ == "__main__": | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment