Created
August 18, 2015 23:27
Revisions
-
naught101 created this gist
Aug 18, 2015 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,77 @@ # -*- coding: utf-8 -*- """ Created on Mon Aug 17 10:53:51 2015 Population projections with low-discrepancy sequence based colour scheme. http://www.storytellingwithdata.com/blog/2015/8/3/visualization-challenge-world-population-forecast @author: naught101 """ import pandas as pd import seaborn as sns import pylab as plt import sobol_seq as ss import numpy as np import matplotlib as mpl def rank_argsort(array): ranks = array.argsort().argsort() return(ranks) # Load country data cntry_data_wide = pd.DataFrame.from_csv('./UN population forecasts from Economist charts - Country.csv') cntry_data = pd.melt(cntry_data_wide, ['Variant', 'Region', 'Country', 'Notes', 'Country code'], var_name='year', value_name='population',) # Calulate means over time - for sorting later cntry_means = cntry_data.ix[:,['Country','population']].groupby('Country').mean() cntry_means = cntry_means.join(cntry_data_wide[['Country', 'Region']].set_index('Country')) cntry_means['ranks'] = cntry_means.groupby('Region')['population'].apply(lambda x: len(x) - 1 - rank_argsort(x)) rgn_means = cntry_data.ix[:,['Region', 'population']].groupby('Region').mean() rgn_means['ranks'] = 5 - (rank_argsort(rgn_means['population'])) # generate Sobol-sorted colours hues = np.linspace(0, 1, rgn_means.shape[0] + 1, endpoint=False) region_hsv = np.array([[hue, 0.8, 0.8] for hue in hues]) #[rgn_means['ranks'].values,:] colour_perturbations = ss.i4_sobol_generate(3, cntry_means['ranks'].max() + 1) colour_perturbations = np.apply_along_axis( lambda x: (x - 0.5)*[0.1, 0.2, 0.2], 1, colour_perturbations) rgn_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1, (mpl.colors.hsv_to_rgb( region_hsv[rgn_means['ranks'], :]))) cntry_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1, mpl.colors.hsv_to_rgb( np.mod( region_hsv[rgn_means.ix[cntry_means['Region'], 'ranks'], :] + colour_perturbations[cntry_means.ranks, :], 1 ) ) ) # There must be a better way to do this cntry_data[['ranks','colours']] = cntry_data[['Country']].merge(cntry_means.reset_index())[['ranks', 'colour']] cntry_data = cntry_data.sort(['Region', 'year', 'ranks']) # Plot data countries = list(cntry_means.sort(['Region', 'ranks']).index) x = np.array(range(2015, 2101)) y = cntry_data.ix[:,['Country','year','population']].pivot(index='year', columns='Country', values='population') y = y.ix[:, countries]/1e6 colours = cntry_means.sort(['Region', 'ranks'])['colour'] fbk = {'lw': 0.0, 'edgecolor': None} # Plot! plt.stackplot(x, y.T, colors=colours, **fbk) plt.xlim((2015,2100)) plt.ylabel('Population - billions', fontsize=14) plt.xlabel('Year', fontsize=14) plt.title('UN population projections 2015-2100', fontsize=16, fontweight='bold') patches = [mpl.patches.Patch(color=row[1]['colour'], label=row[0]) for row in rgn_means.sort(ascending=False).iterrows()] plt.legend(handles=patches, loc='upper left')