Created
August 19, 2024 00:53
-
-
Save thomasahle/4ae43a49ba691a9d3a19257e0a5d7131 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import geopandas as gpd | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import matplotlib.colors as mcolors | |
| from shapely.geometry import Polygon, MultiPolygon | |
| import shapely.ops | |
| import os.path | |
| def get_text_color(fill_color): | |
| """ | |
| Determine whether the text color should be black or white based on the brightness of the fill color. | |
| """ | |
| r, g, b, _ = mcolors.to_rgba(fill_color) | |
| brightness = 0.299 * r + 0.587 * g + 0.114 * b | |
| return "black" if brightness > 0.75 else "white" | |
| def plot_birth_rate_change( | |
| continent="Europe", | |
| data_path="path_to_your_file.csv", | |
| shapefile_path="path_to_shapefile.shp", | |
| start_year="2012", | |
| end_year="2022", | |
| ): | |
| # Load the World Bank data | |
| data = pd.read_csv(data_path, skiprows=4) | |
| # Filter the data for the relevant years | |
| data_filtered = data[ | |
| ["Country Name", "Country Code", start_year, end_year] | |
| ].dropna() | |
| # Calculate the percentage change in birth rate from start_year to end_year | |
| data_filtered["birth_rate_change"] = ( | |
| (data_filtered[end_year] - data_filtered[start_year]) | |
| / data_filtered[start_year] | |
| ) * 100 | |
| # Load the world shapefile | |
| world = gpd.read_file(shapefile_path) | |
| # Modify the continent assignment for Russia | |
| world.loc[world["NAME"] == "Russia", "CONTINENT"] = "Asia" | |
| # Filter the map to only show the specified continent | |
| world_continent = world[world["CONTINENT"] == continent] | |
| # Merge the world map with the birth rate data | |
| world_continent = world_continent.merge( | |
| data_filtered[["Country Code", "birth_rate_change"]], | |
| left_on="ISO_A3", | |
| right_on="Country Code", | |
| how="left", | |
| ) | |
| # Find the maximum and minimum values for display | |
| max_value = world_continent["birth_rate_change"].max() | |
| min_value = world_continent["birth_rate_change"].min() | |
| max_country = world_continent.loc[ | |
| world_continent["birth_rate_change"].idxmax(), "NAME" | |
| ] | |
| min_country = world_continent.loc[ | |
| world_continent["birth_rate_change"].idxmin(), "NAME" | |
| ] | |
| # Define the colormap: green for positive, red for negative | |
| cmap = mcolors.TwoSlopeNorm( | |
| vmin=min(min_value, -1), vcenter=0, vmax=max(max_value, 1) | |
| ) | |
| color_map = plt.cm.RdYlGn | |
| # Plot the data on a map | |
| fig, ax = plt.subplots(1, 1, figsize=(15, 10)) | |
| world_continent.boundary.plot(ax=ax, linewidth=1, color="white") | |
| world_continent.plot( | |
| column="birth_rate_change", | |
| ax=ax, | |
| legend=True, | |
| legend_kwds={ | |
| "label": f"Birth Rate Change", | |
| "orientation": "vertical", | |
| "shrink": 0.7, # To adjust the size of the color bar | |
| "aspect": 30, # Aspect ratio of the color bar | |
| "pad": 0.02, # Padding between plot and color bar | |
| "extend": "both", | |
| "format": lambda x, _: f"{x:.0f}%", | |
| }, | |
| cmap=color_map, | |
| norm=cmap, | |
| missing_kwds={"color": "lightgrey"}, | |
| ) | |
| # Add the percentage change label on each country if it's large enough | |
| for idx, row in world_continent.iterrows(): | |
| if pd.notna(row["birth_rate_change"]): | |
| # Check if the country is large enough to fit the text | |
| if row["geometry"].area > ( | |
| 0.2 * world_continent["geometry"].area.max() | |
| ): # Adjust the threshold as needed | |
| # Use the interior label point to place the text | |
| if isinstance(row["geometry"], MultiPolygon): | |
| poly = max(row["geometry"].geoms, key=lambda a: a.area) | |
| else: | |
| poly = row["geometry"] | |
| point = shapely.ops.polylabel(poly) | |
| # Determine the font size based on the area of the country | |
| base_font_size = 8 # Base font size for smaller countries | |
| scale_factor = ( | |
| 5 # Scale factor to adjust font size for larger countries | |
| ) | |
| font_size = base_font_size + scale_factor * ( | |
| row["geometry"].area / world_continent["geometry"].area.max() | |
| ) | |
| # Determine the fill color for this country | |
| fill_color = color_map(cmap(row["birth_rate_change"])) | |
| text_color = get_text_color(fill_color) | |
| args = dict( | |
| text=f"{row['birth_rate_change']:.2f}%", | |
| horizontalalignment="center", | |
| fontsize=font_size, | |
| ) | |
| # Annotate the map with the calculated font size | |
| plt.annotate( | |
| **args, color="black", xy=(point.x + 0.1, point.y - 0.1), alpha=0.25 | |
| ) | |
| plt.annotate(**args, color="white", xy=(point.x, point.y)) | |
| # Title and other customizations | |
| ax.set_title( | |
| f"Change in Birth Rate (%) from {start_year} to {end_year} in {continent}", | |
| fontsize=20, | |
| ) | |
| ax.set_axis_off() | |
| # Sort the countries by birth rate change | |
| sorted_countries = ( | |
| world_continent[["NAME", "birth_rate_change"]] | |
| .dropna() | |
| .sort_values(by="birth_rate_change") | |
| ) | |
| # Get the 5 countries with the largest positive changes | |
| largest_countries = sorted_countries.tail(5)[::-1] | |
| # Get the 5 countries with the largest negative changes | |
| smallest_countries = sorted_countries.head(5)[::-1] | |
| # Prepare the text for country names and percentage changes | |
| largest_names = "\n".join(name + ":" for name in largest_countries["NAME"]) | |
| largest_values = "\n".join( | |
| [f"{value:.2f}%" for value in largest_countries["birth_rate_change"]] | |
| ) | |
| smallest_names = "\n".join(name + ":" for name in smallest_countries["NAME"]) | |
| smallest_values = "\n".join( | |
| [f"{value:.2f}%" for value in smallest_countries["birth_rate_change"]] | |
| ) | |
| # Combine both texts with additional newlines for spacing | |
| names_text = f"{largest_names}\n...\n{smallest_names}" | |
| values_text = f"{largest_values}\n\n{smallest_values}" | |
| x0, x1, y = 0.0, 0.35, 0.44 | |
| if continent == "South America": | |
| x0 = -0.5 | |
| x1 = 0.1 | |
| # Add the labels to the bottom left corner of the plot with increased line spacing | |
| # First for country names | |
| plt.text( | |
| x=x0, | |
| y=y, | |
| s=names_text, | |
| fontsize=12, | |
| ha="left", | |
| va="top", | |
| transform=ax.transAxes, | |
| linespacing=1.5, | |
| ) | |
| # Then for the corresponding values, slightly offset to the right | |
| plt.text( | |
| x=x1, | |
| y=y, | |
| s=values_text, | |
| fontsize=12, | |
| ha="right", | |
| va="top", | |
| transform=ax.transAxes, | |
| linespacing=1.5, | |
| ) | |
| plt.text( | |
| x=0.9, | |
| y=0, | |
| s="Source: World Bank, Birth rate, crude", | |
| fontsize=8, | |
| ha="right", | |
| va="bottom", | |
| transform=ax.transAxes, | |
| ) | |
| plt.show() | |
| data_path = os.path.expanduser("~/Downloads/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688.csv") | |
| shapefile_path = os.path.expanduser( | |
| "~/Downloads/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp" | |
| ) | |
| for continent in ['Africa', 'Europe', 'Asia', 'North America', 'South America', 'Oceania']: | |
| plot_birth_rate_change( | |
| continent=continent, data_path=data_path, shapefile_path=shapefile_path | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment