Skip to content

Instantly share code, notes, and snippets.

@thomasahle
Created August 19, 2024 00:53
Show Gist options
  • Select an option

  • Save thomasahle/4ae43a49ba691a9d3a19257e0a5d7131 to your computer and use it in GitHub Desktop.

Select an option

Save thomasahle/4ae43a49ba691a9d3a19257e0a5d7131 to your computer and use it in GitHub Desktop.
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.colors as mcolors
from shapely.geometry import Polygon, MultiPolygon
import shapely.ops
import os.path
def get_text_color(fill_color):
"""
Determine whether the text color should be black or white based on the brightness of the fill color.
"""
r, g, b, _ = mcolors.to_rgba(fill_color)
brightness = 0.299 * r + 0.587 * g + 0.114 * b
return "black" if brightness > 0.75 else "white"
def plot_birth_rate_change(
continent="Europe",
data_path="path_to_your_file.csv",
shapefile_path="path_to_shapefile.shp",
start_year="2012",
end_year="2022",
):
# Load the World Bank data
data = pd.read_csv(data_path, skiprows=4)
# Filter the data for the relevant years
data_filtered = data[
["Country Name", "Country Code", start_year, end_year]
].dropna()
# Calculate the percentage change in birth rate from start_year to end_year
data_filtered["birth_rate_change"] = (
(data_filtered[end_year] - data_filtered[start_year])
/ data_filtered[start_year]
) * 100
# Load the world shapefile
world = gpd.read_file(shapefile_path)
# Modify the continent assignment for Russia
world.loc[world["NAME"] == "Russia", "CONTINENT"] = "Asia"
# Filter the map to only show the specified continent
world_continent = world[world["CONTINENT"] == continent]
# Merge the world map with the birth rate data
world_continent = world_continent.merge(
data_filtered[["Country Code", "birth_rate_change"]],
left_on="ISO_A3",
right_on="Country Code",
how="left",
)
# Find the maximum and minimum values for display
max_value = world_continent["birth_rate_change"].max()
min_value = world_continent["birth_rate_change"].min()
max_country = world_continent.loc[
world_continent["birth_rate_change"].idxmax(), "NAME"
]
min_country = world_continent.loc[
world_continent["birth_rate_change"].idxmin(), "NAME"
]
# Define the colormap: green for positive, red for negative
cmap = mcolors.TwoSlopeNorm(
vmin=min(min_value, -1), vcenter=0, vmax=max(max_value, 1)
)
color_map = plt.cm.RdYlGn
# Plot the data on a map
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
world_continent.boundary.plot(ax=ax, linewidth=1, color="white")
world_continent.plot(
column="birth_rate_change",
ax=ax,
legend=True,
legend_kwds={
"label": f"Birth Rate Change",
"orientation": "vertical",
"shrink": 0.7, # To adjust the size of the color bar
"aspect": 30, # Aspect ratio of the color bar
"pad": 0.02, # Padding between plot and color bar
"extend": "both",
"format": lambda x, _: f"{x:.0f}%",
},
cmap=color_map,
norm=cmap,
missing_kwds={"color": "lightgrey"},
)
# Add the percentage change label on each country if it's large enough
for idx, row in world_continent.iterrows():
if pd.notna(row["birth_rate_change"]):
# Check if the country is large enough to fit the text
if row["geometry"].area > (
0.2 * world_continent["geometry"].area.max()
): # Adjust the threshold as needed
# Use the interior label point to place the text
if isinstance(row["geometry"], MultiPolygon):
poly = max(row["geometry"].geoms, key=lambda a: a.area)
else:
poly = row["geometry"]
point = shapely.ops.polylabel(poly)
# Determine the font size based on the area of the country
base_font_size = 8 # Base font size for smaller countries
scale_factor = (
5 # Scale factor to adjust font size for larger countries
)
font_size = base_font_size + scale_factor * (
row["geometry"].area / world_continent["geometry"].area.max()
)
# Determine the fill color for this country
fill_color = color_map(cmap(row["birth_rate_change"]))
text_color = get_text_color(fill_color)
args = dict(
text=f"{row['birth_rate_change']:.2f}%",
horizontalalignment="center",
fontsize=font_size,
)
# Annotate the map with the calculated font size
plt.annotate(
**args, color="black", xy=(point.x + 0.1, point.y - 0.1), alpha=0.25
)
plt.annotate(**args, color="white", xy=(point.x, point.y))
# Title and other customizations
ax.set_title(
f"Change in Birth Rate (%) from {start_year} to {end_year} in {continent}",
fontsize=20,
)
ax.set_axis_off()
# Sort the countries by birth rate change
sorted_countries = (
world_continent[["NAME", "birth_rate_change"]]
.dropna()
.sort_values(by="birth_rate_change")
)
# Get the 5 countries with the largest positive changes
largest_countries = sorted_countries.tail(5)[::-1]
# Get the 5 countries with the largest negative changes
smallest_countries = sorted_countries.head(5)[::-1]
# Prepare the text for country names and percentage changes
largest_names = "\n".join(name + ":" for name in largest_countries["NAME"])
largest_values = "\n".join(
[f"{value:.2f}%" for value in largest_countries["birth_rate_change"]]
)
smallest_names = "\n".join(name + ":" for name in smallest_countries["NAME"])
smallest_values = "\n".join(
[f"{value:.2f}%" for value in smallest_countries["birth_rate_change"]]
)
# Combine both texts with additional newlines for spacing
names_text = f"{largest_names}\n...\n{smallest_names}"
values_text = f"{largest_values}\n\n{smallest_values}"
x0, x1, y = 0.0, 0.35, 0.44
if continent == "South America":
x0 = -0.5
x1 = 0.1
# Add the labels to the bottom left corner of the plot with increased line spacing
# First for country names
plt.text(
x=x0,
y=y,
s=names_text,
fontsize=12,
ha="left",
va="top",
transform=ax.transAxes,
linespacing=1.5,
)
# Then for the corresponding values, slightly offset to the right
plt.text(
x=x1,
y=y,
s=values_text,
fontsize=12,
ha="right",
va="top",
transform=ax.transAxes,
linespacing=1.5,
)
plt.text(
x=0.9,
y=0,
s="Source: World Bank, Birth rate, crude",
fontsize=8,
ha="right",
va="bottom",
transform=ax.transAxes,
)
plt.show()
data_path = os.path.expanduser("~/Downloads/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688.csv")
shapefile_path = os.path.expanduser(
"~/Downloads/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"
)
for continent in ['Africa', 'Europe', 'Asia', 'North America', 'South America', 'Oceania']:
plot_birth_rate_change(
continent=continent, data_path=data_path, shapefile_path=shapefile_path
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment