Created
August 18, 2015 23:27
-
-
Save naught101/e7073751c0fa6d6bb6e6 to your computer and use it in GitHub Desktop.
Population projections
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Aug 17 10:53:51 2015 | |
Population projections with low-discrepancy sequence based colour scheme. | |
http://www.storytellingwithdata.com/blog/2015/8/3/visualization-challenge-world-population-forecast | |
@author: naught101 | |
""" | |
import pandas as pd | |
import seaborn as sns | |
import pylab as plt | |
import sobol_seq as ss | |
import numpy as np | |
import matplotlib as mpl | |
def rank_argsort(array): | |
ranks = array.argsort().argsort() | |
return(ranks) | |
# Load country data | |
cntry_data_wide = pd.DataFrame.from_csv('./UN population forecasts from Economist charts - Country.csv') | |
cntry_data = pd.melt(cntry_data_wide, ['Variant', 'Region', 'Country', 'Notes', 'Country code'], | |
var_name='year', value_name='population',) | |
# Calulate means over time - for sorting later | |
cntry_means = cntry_data.ix[:,['Country','population']].groupby('Country').mean() | |
cntry_means = cntry_means.join(cntry_data_wide[['Country', 'Region']].set_index('Country')) | |
cntry_means['ranks'] = cntry_means.groupby('Region')['population'].apply(lambda x: len(x) - 1 - rank_argsort(x)) | |
rgn_means = cntry_data.ix[:,['Region', 'population']].groupby('Region').mean() | |
rgn_means['ranks'] = 5 - (rank_argsort(rgn_means['population'])) | |
# generate Sobol-sorted colours | |
hues = np.linspace(0, 1, rgn_means.shape[0] + 1, endpoint=False) | |
region_hsv = np.array([[hue, 0.8, 0.8] for hue in hues]) #[rgn_means['ranks'].values,:] | |
colour_perturbations = ss.i4_sobol_generate(3, cntry_means['ranks'].max() + 1) | |
colour_perturbations = np.apply_along_axis( | |
lambda x: (x - 0.5)*[0.1, 0.2, 0.2], 1, colour_perturbations) | |
rgn_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1, | |
(mpl.colors.hsv_to_rgb( | |
region_hsv[rgn_means['ranks'], :]))) | |
cntry_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1, | |
mpl.colors.hsv_to_rgb( | |
np.mod( | |
region_hsv[rgn_means.ix[cntry_means['Region'], 'ranks'], :] + | |
colour_perturbations[cntry_means.ranks, :], 1 | |
) | |
) | |
) | |
# There must be a better way to do this | |
cntry_data[['ranks','colours']] = cntry_data[['Country']].merge(cntry_means.reset_index())[['ranks', 'colour']] | |
cntry_data = cntry_data.sort(['Region', 'year', 'ranks']) | |
# Plot data | |
countries = list(cntry_means.sort(['Region', 'ranks']).index) | |
x = np.array(range(2015, 2101)) | |
y = cntry_data.ix[:,['Country','year','population']].pivot(index='year', columns='Country', values='population') | |
y = y.ix[:, countries]/1e6 | |
colours = cntry_means.sort(['Region', 'ranks'])['colour'] | |
fbk = {'lw': 0.0, 'edgecolor': None} | |
# Plot! | |
plt.stackplot(x, y.T, colors=colours, **fbk) | |
plt.xlim((2015,2100)) | |
plt.ylabel('Population - billions', fontsize=14) | |
plt.xlabel('Year', fontsize=14) | |
plt.title('UN population projections 2015-2100', fontsize=16, fontweight='bold') | |
patches = [mpl.patches.Patch(color=row[1]['colour'], label=row[0]) for row in rgn_means.sort(ascending=False).iterrows()] | |
plt.legend(handles=patches, loc='upper left') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment