Skip to content

Instantly share code, notes, and snippets.

@2torus
Last active November 11, 2023 17:08
Show Gist options
  • Save 2torus/b8152914ca6c6bb08ec3c6df4895b9a1 to your computer and use it in GitHub Desktop.
Save 2torus/b8152914ca6c6bb08ec3c6df4895b9a1 to your computer and use it in GitHub Desktop.
Compute EU membership on a given date with Python by using data from Wikipedia
import pandas as pd
from datetime import date
from typing import List
from functools import cache
from warnings import warn
import re
MEMBERSHIP_URL = 'https://en.wikipedia.org/wiki/Member_state_of_the_European_Union'
MEMBERSHIP_URL_PERM_LINK = 'https://en.wikipedia.org/w/index.php?title=Member_state_of_the_European_Union&oldid=1184106841'
CANONICAL_WIKI_DATE = date(2023, 11, 11)
TREATY_OF_MAASTRICHT_EFFECTIVE_DATE = date(1993, 11, 1)
def _clean_names(df):
df.loc[:, 'Country'] = df['Country'].str.replace('\[.*\]','', regex=True)
def _split_founders_joiners(df):
is_founder = df['Accession'].str.match('Founder.*')
founders = df[is_founder]
joiners = df[~is_founder]
joiners.loc[:, 'Accession'] = pd.to_datetime(joiners['Accession']).map(lambda dt: dt.date())
return founders, joiners
@cache
def _wiki_data(canonical=False):
if canonical:
tables = pd.read_html(MEMBERSHIP_URL_PERM_LINK, match = 'Population', parse_dates=True)
else:
tables = pd.read_html(MEMBERSHIP_URL, match = 'Population', parse_dates=True)
exited = [table for table in tables if 'Accession' in table.columns and 'Withdrawal' in table.columns][0]
exited = exited[['Country', 'Accession', 'Withdrawal']]
exited.loc[:, 'Withdrawal'] = pd.to_datetime(exited['Withdrawal']).map(lambda dt: dt.date())
_clean_names(exited)
exited_founders, exited_joiners = _split_founders_joiners(exited)
remain = [table for table in tables if 'Accession' in table.columns and 'Withdrawal' not in table.columns][0]
remain = remain[['Country', 'Accession']]
_clean_names(remain)
remain_founders, remain_joiners = _split_founders_joiners(remain)
return ((remain_founders, remain_joiners), (exited_founders, exited_joiners))
def eu_members(dt: date, founding_date = TREATY_OF_MAASTRICHT_EFFECTIVE_DATE, canonical=False) -> List[str]:
"""
Returns the list of EU members on a given date using Wikipedia data from https://en.wikipedia.org/w/index.php?title=Member_state_of_the_European_Union
Useful in cases when data is tabulated both for EU and its member states, e.g. https://www.census.gov/foreign-trade/balance/.
founding_date - default to Maastrich Treaty effective date, see https://en.wikipedia.org/wiki/Maastricht_Treaty
canonical - use a frozen link Wikipedia data at the time of the writing of the code but have outdated data
>>> eu_members(date(1992, 1, 1))
[]
>>> eu_members(date(1998, 1, 1))
['Austria', 'Belgium', 'Denmark', 'Finland', 'France', 'Germany', 'Greece', 'Ireland', 'Italy', 'Luxembourg', 'Netherlands', 'Portugal', 'Spain', 'Sweden', 'United Kingdom']
>>> eu_members(date(2019, 1, 1))
['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'United Kingdom']
>>> eu_members(date(2023, 1, 1))
['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden']
"""
if canonical and dt > canonical_wiki_date:
warn(f'Using Wikipedia data as of {canonical_wiki_date}, might be outdated')
((remain_founders, remain_joiners), (exited_founders, exited_joiners)) = _wiki_data()
if dt < founding_date:
return []
exited_founders = exited_founders[exited_founders['Withdrawal'] < dt]['Country'].tolist()
remain_founders = remain_founders['Country'].tolist()
remain_joiners = remain_joiners[remain_joiners['Accession'] <= dt]['Country'].tolist()
exited_joiners = exited_joiners[(exited_joiners['Accession'] <= dt)&(exited_joiners['Withdrawal'] >= dt)]['Country'].tolist()
return sorted([ *remain_founders, *exited_founders, *remain_joiners, *exited_joiners ])
if __name__ == "__main__":
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment