thomasahle · August 19, 2024 00:53
diff --git a/continents.py b/continents.py
 import geopandas as gpd
 import matplotlib.pyplot as plt
 import pandas as pd
 import matplotlib.colors as mcolors
 from shapely.geometry import Polygon, MultiPolygon
 import shapely.ops
 import os.path


 def get_text_color(fill_color):
    """
    Determine whether the text color should be black or white based on the brightness of the fill color.
    """
    r, g, b, _ = mcolors.to_rgba(fill_color)
    brightness = 0.299 * r + 0.587 * g + 0.114 * b
    return "black" if brightness > 0.75 else "white"


 def plot_birth_rate_change(
    continent="Europe",
    data_path="path_to_your_file.csv",
    shapefile_path="path_to_shapefile.shp",
    start_year="2012",
    end_year="2022",
 ):
    # Load the World Bank data
    data = pd.read_csv(data_path, skiprows=4)

    # Filter the data for the relevant years
    data_filtered = data[
        ["Country Name", "Country Code", start_year, end_year]
    ].dropna()

    # Calculate the percentage change in birth rate from start_year to end_year
    data_filtered["birth_rate_change"] = (
        (data_filtered[end_year] - data_filtered[start_year])
        / data_filtered[start_year]
    ) * 100

    # Load the world shapefile
    world = gpd.read_file(shapefile_path)
    # Modify the continent assignment for Russia
    world.loc[world["NAME"] == "Russia", "CONTINENT"] = "Asia"

    # Filter the map to only show the specified continent
    world_continent = world[world["CONTINENT"] == continent]

    # Merge the world map with the birth rate data
    world_continent = world_continent.merge(
        data_filtered[["Country Code", "birth_rate_change"]],
        left_on="ISO_A3",
        right_on="Country Code",
        how="left",
    )

    # Find the maximum and minimum values for display
    max_value = world_continent["birth_rate_change"].max()
    min_value = world_continent["birth_rate_change"].min()
    max_country = world_continent.loc[
        world_continent["birth_rate_change"].idxmax(), "NAME"
    ]
    min_country = world_continent.loc[
        world_continent["birth_rate_change"].idxmin(), "NAME"
    ]

    # Define the colormap: green for positive, red for negative
    cmap = mcolors.TwoSlopeNorm(
        vmin=min(min_value, -1), vcenter=0, vmax=max(max_value, 1)
    )
    color_map = plt.cm.RdYlGn

    # Plot the data on a map
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    world_continent.boundary.plot(ax=ax, linewidth=1, color="white")
    world_continent.plot(
        column="birth_rate_change",
        ax=ax,
        legend=True,
        legend_kwds={
            "label": f"Birth Rate Change",
            "orientation": "vertical",
            "shrink": 0.7,  # To adjust the size of the color bar
            "aspect": 30,  # Aspect ratio of the color bar
            "pad": 0.02,  # Padding between plot and color bar
            "extend": "both",
            "format": lambda x, _: f"{x:.0f}%",
        },
        cmap=color_map,
        norm=cmap,
        missing_kwds={"color": "lightgrey"},
    )

    # Add the percentage change label on each country if it's large enough
    for idx, row in world_continent.iterrows():
        if pd.notna(row["birth_rate_change"]):
            # Check if the country is large enough to fit the text
            if row["geometry"].area > (
                0.2 * world_continent["geometry"].area.max()
            ):  # Adjust the threshold as needed
                # Use the interior label point to place the text
                if isinstance(row["geometry"], MultiPolygon):
                    poly = max(row["geometry"].geoms, key=lambda a: a.area)
                else:
                    poly = row["geometry"]
                point = shapely.ops.polylabel(poly)

                # Determine the font size based on the area of the country
                base_font_size = 8  # Base font size for smaller countries
                scale_factor = (
                    5  # Scale factor to adjust font size for larger countries
                )
                font_size = base_font_size + scale_factor * (
                    row["geometry"].area / world_continent["geometry"].area.max()
                )

                # Determine the fill color for this country
                fill_color = color_map(cmap(row["birth_rate_change"]))
                text_color = get_text_color(fill_color)

                args = dict(
                    text=f"{row['birth_rate_change']:.2f}%",
                    horizontalalignment="center",
                    fontsize=font_size,
                )
                # Annotate the map with the calculated font size
                plt.annotate(
                    **args, color="black", xy=(point.x + 0.1, point.y - 0.1), alpha=0.25
                )
                plt.annotate(**args, color="white", xy=(point.x, point.y))

    # Title and other customizations
    ax.set_title(
        f"Change in Birth Rate (%) from {start_year} to {end_year} in {continent}",
        fontsize=20,
    )
    ax.set_axis_off()

    # Sort the countries by birth rate change
    sorted_countries = (
        world_continent[["NAME", "birth_rate_change"]]
        .dropna()
        .sort_values(by="birth_rate_change")
    )

    # Get the 5 countries with the largest positive changes
    largest_countries = sorted_countries.tail(5)[::-1]

    # Get the 5 countries with the largest negative changes
    smallest_countries = sorted_countries.head(5)[::-1]

    # Prepare the text for country names and percentage changes
    largest_names = "\n".join(name + ":" for name in largest_countries["NAME"])
    largest_values = "\n".join(
        [f"{value:.2f}%" for value in largest_countries["birth_rate_change"]]
    )

    smallest_names = "\n".join(name + ":" for name in smallest_countries["NAME"])
    smallest_values = "\n".join(
        [f"{value:.2f}%" for value in smallest_countries["birth_rate_change"]]
    )

    # Combine both texts with additional newlines for spacing
    names_text = f"{largest_names}\n...\n{smallest_names}"
    values_text = f"{largest_values}\n\n{smallest_values}"

    x0, x1, y = 0.0, 0.35, 0.44
    if continent == "South America":
        x0 = -0.5
        x1 = 0.1

    # Add the labels to the bottom left corner of the plot with increased line spacing
    # First for country names
    plt.text(
        x=x0,
        y=y,
        s=names_text,
        fontsize=12,
        ha="left",
        va="top",
        transform=ax.transAxes,
        linespacing=1.5,
    )

    # Then for the corresponding values, slightly offset to the right
    plt.text(
        x=x1,
        y=y,
        s=values_text,
        fontsize=12,
        ha="right",
        va="top",
        transform=ax.transAxes,
        linespacing=1.5,
    )

    plt.text(
        x=0.9,
        y=0,
        s="Source: World Bank, Birth rate, crude",
        fontsize=8,
        ha="right",
        va="bottom",
        transform=ax.transAxes,
    )

    plt.show()


 data_path = os.path.expanduser("~/Downloads/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688.csv")
 shapefile_path = os.path.expanduser(
    "~/Downloads/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"
 )

 for continent in ['Africa', 'Europe', 'Asia', 'North America', 'South America', 'Oceania']:
    plot_birth_rate_change(
        continent=continent, data_path=data_path, shapefile_path=shapefile_path
    )
	import geopandas as gpd
	import matplotlib.pyplot as plt
	import pandas as pd
	import matplotlib.colors as mcolors
	from shapely.geometry import Polygon, MultiPolygon
	import shapely.ops
	import os.path


	def get_text_color(fill_color):
	"""
	Determine whether the text color should be black or white based on the brightness of the fill color.
	"""
	r, g, b, _ = mcolors.to_rgba(fill_color)
	brightness = 0.299 * r + 0.587 * g + 0.114 * b
	return "black" if brightness > 0.75 else "white"


	def plot_birth_rate_change(
	continent="Europe",
	data_path="path_to_your_file.csv",
	shapefile_path="path_to_shapefile.shp",
	start_year="2012",
	end_year="2022",
	):
	# Load the World Bank data
	data = pd.read_csv(data_path, skiprows=4)

	# Filter the data for the relevant years
	data_filtered = data[
	["Country Name", "Country Code", start_year, end_year]
	].dropna()

	# Calculate the percentage change in birth rate from start_year to end_year
	data_filtered["birth_rate_change"] = (
	(data_filtered[end_year] - data_filtered[start_year])
	/ data_filtered[start_year]
	) * 100

	# Load the world shapefile
	world = gpd.read_file(shapefile_path)
	# Modify the continent assignment for Russia
	world.loc[world["NAME"] == "Russia", "CONTINENT"] = "Asia"

	# Filter the map to only show the specified continent
	world_continent = world[world["CONTINENT"] == continent]

	# Merge the world map with the birth rate data
	world_continent = world_continent.merge(
	data_filtered[["Country Code", "birth_rate_change"]],
	left_on="ISO_A3",
	right_on="Country Code",
	how="left",
	)

	# Find the maximum and minimum values for display
	max_value = world_continent["birth_rate_change"].max()
	min_value = world_continent["birth_rate_change"].min()
	max_country = world_continent.loc[
	world_continent["birth_rate_change"].idxmax(), "NAME"
	]
	min_country = world_continent.loc[
	world_continent["birth_rate_change"].idxmin(), "NAME"
	]

	# Define the colormap: green for positive, red for negative
	cmap = mcolors.TwoSlopeNorm(
	vmin=min(min_value, -1), vcenter=0, vmax=max(max_value, 1)
	)
	color_map = plt.cm.RdYlGn

	# Plot the data on a map
	fig, ax = plt.subplots(1, 1, figsize=(15, 10))
	world_continent.boundary.plot(ax=ax, linewidth=1, color="white")
	world_continent.plot(
	column="birth_rate_change",
	ax=ax,
	legend=True,
	legend_kwds={
	"label": f"Birth Rate Change",
	"orientation": "vertical",
	"shrink": 0.7, # To adjust the size of the color bar
	"aspect": 30, # Aspect ratio of the color bar
	"pad": 0.02, # Padding between plot and color bar
	"extend": "both",
	"format": lambda x, _: f"{x:.0f}%",
	},
	cmap=color_map,
	norm=cmap,
	missing_kwds={"color": "lightgrey"},
	)

	# Add the percentage change label on each country if it's large enough
	for idx, row in world_continent.iterrows():
	if pd.notna(row["birth_rate_change"]):
	# Check if the country is large enough to fit the text
	if row["geometry"].area > (
	0.2 * world_continent["geometry"].area.max()
	): # Adjust the threshold as needed
	# Use the interior label point to place the text
	if isinstance(row["geometry"], MultiPolygon):
	poly = max(row["geometry"].geoms, key=lambda a: a.area)
	else:
	poly = row["geometry"]
	point = shapely.ops.polylabel(poly)

	# Determine the font size based on the area of the country
	base_font_size = 8 # Base font size for smaller countries
	scale_factor = (
	5 # Scale factor to adjust font size for larger countries
	)
	font_size = base_font_size + scale_factor * (
	row["geometry"].area / world_continent["geometry"].area.max()
	)

	# Determine the fill color for this country
	fill_color = color_map(cmap(row["birth_rate_change"]))
	text_color = get_text_color(fill_color)

	args = dict(
	text=f"{row['birth_rate_change']:.2f}%",
	horizontalalignment="center",
	fontsize=font_size,
	)
	# Annotate the map with the calculated font size
	plt.annotate(
	**args, color="black", xy=(point.x + 0.1, point.y - 0.1), alpha=0.25
	)
	plt.annotate(**args, color="white", xy=(point.x, point.y))

	# Title and other customizations
	ax.set_title(
	f"Change in Birth Rate (%) from {start_year} to {end_year} in {continent}",
	fontsize=20,
	)
	ax.set_axis_off()

	# Sort the countries by birth rate change
	sorted_countries = (
	world_continent[["NAME", "birth_rate_change"]]
	.dropna()
	.sort_values(by="birth_rate_change")
	)

	# Get the 5 countries with the largest positive changes
	largest_countries = sorted_countries.tail(5)[::-1]

	# Get the 5 countries with the largest negative changes
	smallest_countries = sorted_countries.head(5)[::-1]

	# Prepare the text for country names and percentage changes
	largest_names = "\n".join(name + ":" for name in largest_countries["NAME"])
	largest_values = "\n".join(
	[f"{value:.2f}%" for value in largest_countries["birth_rate_change"]]
	)

	smallest_names = "\n".join(name + ":" for name in smallest_countries["NAME"])
	smallest_values = "\n".join(
	[f"{value:.2f}%" for value in smallest_countries["birth_rate_change"]]
	)

	# Combine both texts with additional newlines for spacing
	names_text = f"{largest_names}\n...\n{smallest_names}"
	values_text = f"{largest_values}\n\n{smallest_values}"

	x0, x1, y = 0.0, 0.35, 0.44
	if continent == "South America":
	x0 = -0.5
	x1 = 0.1

	# Add the labels to the bottom left corner of the plot with increased line spacing
	# First for country names
	plt.text(
	x=x0,
	y=y,
	s=names_text,
	fontsize=12,
	ha="left",
	va="top",
	transform=ax.transAxes,
	linespacing=1.5,
	)

	# Then for the corresponding values, slightly offset to the right
	plt.text(
	x=x1,
	y=y,
	s=values_text,
	fontsize=12,
	ha="right",
	va="top",
	transform=ax.transAxes,
	linespacing=1.5,
	)

	plt.text(
	x=0.9,
	y=0,
	s="Source: World Bank, Birth rate, crude",
	fontsize=8,
	ha="right",
	va="bottom",
	transform=ax.transAxes,
	)

	plt.show()


	data_path = os.path.expanduser("~/Downloads/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688/API_SP.DYN.CBRT.IN_DS2_en_csv_v2_3435688.csv")
	shapefile_path = os.path.expanduser(
	"~/Downloads/ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"
	)

	for continent in ['Africa', 'Europe', 'Asia', 'North America', 'South America', 'Oceania']:
	plot_birth_rate_change(
	continent=continent, data_path=data_path, shapefile_path=shapefile_path
	)
No results found