Skip to content

Instantly share code, notes, and snippets.

@naught101
Created August 18, 2015 23:27

Revisions

  1. naught101 created this gist Aug 18, 2015.
    77 changes: 77 additions & 0 deletions pop_projections.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,77 @@
    # -*- coding: utf-8 -*-
    """
    Created on Mon Aug 17 10:53:51 2015
    Population projections with low-discrepancy sequence based colour scheme.
    http://www.storytellingwithdata.com/blog/2015/8/3/visualization-challenge-world-population-forecast
    @author: naught101
    """

    import pandas as pd
    import seaborn as sns
    import pylab as plt
    import sobol_seq as ss
    import numpy as np
    import matplotlib as mpl


    def rank_argsort(array):
    ranks = array.argsort().argsort()
    return(ranks)


    # Load country data
    cntry_data_wide = pd.DataFrame.from_csv('./UN population forecasts from Economist charts - Country.csv')
    cntry_data = pd.melt(cntry_data_wide, ['Variant', 'Region', 'Country', 'Notes', 'Country code'],
    var_name='year', value_name='population',)

    # Calulate means over time - for sorting later
    cntry_means = cntry_data.ix[:,['Country','population']].groupby('Country').mean()
    cntry_means = cntry_means.join(cntry_data_wide[['Country', 'Region']].set_index('Country'))
    cntry_means['ranks'] = cntry_means.groupby('Region')['population'].apply(lambda x: len(x) - 1 - rank_argsort(x))
    rgn_means = cntry_data.ix[:,['Region', 'population']].groupby('Region').mean()
    rgn_means['ranks'] = 5 - (rank_argsort(rgn_means['population']))

    # generate Sobol-sorted colours
    hues = np.linspace(0, 1, rgn_means.shape[0] + 1, endpoint=False)
    region_hsv = np.array([[hue, 0.8, 0.8] for hue in hues]) #[rgn_means['ranks'].values,:]

    colour_perturbations = ss.i4_sobol_generate(3, cntry_means['ranks'].max() + 1)
    colour_perturbations = np.apply_along_axis(
    lambda x: (x - 0.5)*[0.1, 0.2, 0.2], 1, colour_perturbations)

    rgn_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1,
    (mpl.colors.hsv_to_rgb(
    region_hsv[rgn_means['ranks'], :])))

    cntry_means['colour'] = np.apply_along_axis(mpl.colors.rgb2hex, 1,
    mpl.colors.hsv_to_rgb(
    np.mod(
    region_hsv[rgn_means.ix[cntry_means['Region'], 'ranks'], :] +
    colour_perturbations[cntry_means.ranks, :], 1
    )
    )
    )

    # There must be a better way to do this
    cntry_data[['ranks','colours']] = cntry_data[['Country']].merge(cntry_means.reset_index())[['ranks', 'colour']]
    cntry_data = cntry_data.sort(['Region', 'year', 'ranks'])

    # Plot data
    countries = list(cntry_means.sort(['Region', 'ranks']).index)
    x = np.array(range(2015, 2101))
    y = cntry_data.ix[:,['Country','year','population']].pivot(index='year', columns='Country', values='population')
    y = y.ix[:, countries]/1e6
    colours = cntry_means.sort(['Region', 'ranks'])['colour']
    fbk = {'lw': 0.0, 'edgecolor': None}

    # Plot!
    plt.stackplot(x, y.T, colors=colours, **fbk)
    plt.xlim((2015,2100))
    plt.ylabel('Population - billions', fontsize=14)
    plt.xlabel('Year', fontsize=14)
    plt.title('UN population projections 2015-2100', fontsize=16, fontweight='bold')

    patches = [mpl.patches.Patch(color=row[1]['colour'], label=row[0]) for row in rgn_means.sort(ascending=False).iterrows()]
    plt.legend(handles=patches, loc='upper left')