Skip to content

Instantly share code, notes, and snippets.

@zufanka
Last active February 2, 2022 16:49
Show Gist options
  • Select an option

  • Save zufanka/e652b0723681fd3760fa8de5c9e71d1f to your computer and use it in GitHub Desktop.

Select an option

Save zufanka/e652b0723681fd3760fa8de5c9e71d1f to your computer and use it in GitHub Desktop.

Imports needed

import pandas as pd
import numpy as np

from IPython.display import Markdown, display

def printmd(string):
    display(Markdown(string))
    
pd.set_option("max_colwidth", 500)
pd.options.display.max_rows = 999
pd.options.display.max_columns = 100
pd.options.display.float_format = '{:,.2f}'.format

Review some sample data

df.sample(3).T

Unique values sorted on frequency

printmd(f'Unique values (sorted on least unique values):')
uvalues = {x : len(df[x].unique()) for x in df.columns}
uvaluesprint = [printmd(f'**{x[0]}** : {x[1]}') for x in dict(sorted(uv.items(), key=lambda x: x[1])).items()]

Colorful pivot counting categorical variables

def color_table(v, dataquantiles, colors):

        '''
        dataquantiles : quantiles from the data as list
        colors : colors to color as list
        '''

        if len(dataquantiles)+1 != len(colors):
            print("Quantiles and colors lenght mismatch!")
            return

        base = 'color:white;background-color:'

        n = len(dataquantiles)

        if v <= dataquantiles[0]:
            return f'{base}{colors[0]}'
        elif dataquantiles[0] < v <= dataquantiles[1]:
            return f'{base}{colors[1]}'
        elif dataquantiles[1] < v <= dataquantiles[2]:
            return f'{base}{colors[2]}'
        elif dataquantiles[2] < v <= dataquantiles[3]:
            return f'{base}{colors[3]}'
        elif dataquantiles[3] < v <= dataquantiles[4]:
            return f'{base}{colors[4]}'
        elif dataquantiles[4] < v <= dataquantiles[5]:
            return f'{base}{colors[5]}'
        elif v >= dataquantiles[5]:
            return f'{base}{colors[6]}'
        else:
            return None

def colorful_pivot_count(df, index, columns, values, decimals):
    
    '''
    for categorical variables
    pivot two variables and count some values
    color according to calculated quantiles
    '''
        
    pivot = pd.pivot_table(df.groupby([index, columns]).count()[values].reset_index(), index=index, columns=columns, values=values)

    quantiles = [10, 25, 50, 75, 90, 99]
    colors = ["#E7DAE6", "#CFB5CD", "#B690B3", "#9E6B9A", "#7B5178", "#634060","#312030"]

    dataquantiles = []
    for q in quantiles:
        dataquantiles.append(np.nanquantile(pivot.values, q/100))

    return pivot.style.format(f'{{:.{decimals}f}}').applymap(lambda v: color_table(v, dataquantiles, colors))
    
index = "Country"
columns = "Year"
values = "Intervention"
decimals = 0

colorful_pivot_count(df, index, columns, values, decimals)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment