Created
December 19, 2022 17:47
-
-
Save kylrth/b75dbf2dbc410bb7b636baa8938e660e to your computer and use it in GitHub Desktop.
some population and immigration data sleuthing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def get_historical_rates(country: str) -> pd.DataFrame: | |
# https://ourworldindata.org/grapher/population-growth-rate-with-and-without-migration | |
df = pd.read_csv("ourworldindata.csv") | |
df = df[df["Entity"] == country] | |
df = df.drop(["Entity", "Code"], axis=1) | |
df = df.rename( | |
columns={ | |
"Year": "year", | |
"Growth rate - Sex: all - Age: all - Variant: estimates": "total_rate", | |
"Natural growth rate - Sex: all - Age: all - Variant: estimates": "natural_rate", | |
} | |
) | |
df = df.set_index("year") | |
return df | |
def idx_and_value(s: pd.Series, i: int): | |
return s.index[i], s.iloc[i] | |
def main(): | |
historical = { | |
"US": get_historical_rates("United States"), | |
"India": get_historical_rates("India"), | |
"Malaysia": get_historical_rates("Malaysia"), | |
} | |
# https://en.wikipedia.org/wiki/List_of_countries_by_population_growth_rate | |
# https://en.wikipedia.org/wiki/List_of_sovereign_states_by_natural_increase | |
df = pd.read_csv("world_pop.csv", index_col="country") | |
world_ch = df["cia_percent"][df["cia_year"] == 2021] | |
df = pd.read_csv("world_nat_pop.csv", index_col="country") | |
world_nat_ch = df["natural_increase_rate"] / 10 | |
world_nat_ch.name = "natural_rate" | |
df = pd.DataFrame({"total_rate": world_ch}).join(world_nat_ch, how="inner") | |
df["immigration"] = df["total_rate"] - df["natural_rate"] | |
world = df | |
# print out interesting stats | |
for country in historical: | |
s = historical[country]["natural_rate"] | |
i = s.argmax() | |
print(f"highest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}") | |
i = s.argmin() | |
print(f"lowest natural growth in {country}: {s.iloc[i]}%, in {s.index[i]}") | |
print("\ncurrent stats for select countries, sorted by immigration rate:") | |
print( | |
world.loc[ | |
[ | |
"Australia", | |
"Canada", | |
"France", | |
"Germany", | |
"Greece", | |
"India", | |
"Italy", | |
"Japan", | |
"New Zealand", | |
"South Korea", | |
"Sweden", | |
"United Kingdom", | |
"United States", | |
] | |
].sort_values("immigration") | |
) | |
print("\ncountries experiencing the highest rates of attrition:") | |
print(world.sort_values(by="immigration", ascending=True).head(10)) | |
print("\ncountries experiencing the highest natural growth:") | |
print(world.sort_values(by="natural_rate", ascending=False).head(10)) | |
if __name__ == "__main__": | |
pd.set_option("display.max_rows", 500) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment