import pandas as pd
import numpy as np
from IPython .display import Markdown , display
def printmd (string ):
display (Markdown (string ))
pd .set_option ("max_colwidth" , 500 )
pd .options .display .max_rows = 999
pd .options .display .max_columns = 100
pd .options .display .float_format = '{:,.2f}' .format
Unique values sorted on frequency
printmd (f'Unique values (sorted on least unique values):' )
uvalues = {x : len (df [x ].unique ()) for x in df .columns }
uvaluesprint = [printmd (f'**{ x [0 ]} ** : { x [1 ]} ' ) for x in dict (sorted (uv .items (), key = lambda x : x [1 ])).items ()]
Colorful pivot counting categorical variables
def color_table (v , dataquantiles , colors ):
'''
dataquantiles : quantiles from the data as list
colors : colors to color as list
'''
if len (dataquantiles )+ 1 != len (colors ):
print ("Quantiles and colors lenght mismatch!" )
return
base = 'color:white;background-color:'
n = len (dataquantiles )
if v <= dataquantiles [0 ]:
return f'{ base } { colors [0 ]} '
elif dataquantiles [0 ] < v <= dataquantiles [1 ]:
return f'{ base } { colors [1 ]} '
elif dataquantiles [1 ] < v <= dataquantiles [2 ]:
return f'{ base } { colors [2 ]} '
elif dataquantiles [2 ] < v <= dataquantiles [3 ]:
return f'{ base } { colors [3 ]} '
elif dataquantiles [3 ] < v <= dataquantiles [4 ]:
return f'{ base } { colors [4 ]} '
elif dataquantiles [4 ] < v <= dataquantiles [5 ]:
return f'{ base } { colors [5 ]} '
elif v >= dataquantiles [5 ]:
return f'{ base } { colors [6 ]} '
else :
return None
def colorful_pivot_count (df , index , columns , values , decimals ):
'''
for categorical variables
pivot two variables and count some values
color according to calculated quantiles
'''
pivot = pd .pivot_table (df .groupby ([index , columns ]).count ()[values ].reset_index (), index = index , columns = columns , values = values )
quantiles = [10 , 25 , 50 , 75 , 90 , 99 ]
colors = ["#E7DAE6" , "#CFB5CD" , "#B690B3" , "#9E6B9A" , "#7B5178" , "#634060" ,"#312030" ]
dataquantiles = []
for q in quantiles :
dataquantiles .append (np .nanquantile (pivot .values , q / 100 ))
return pivot .style .format (f'{{:.{ decimals } f}}' ).applymap (lambda v : color_table (v , dataquantiles , colors ))
index = "Country"
columns = "Year"
values = "Intervention"
decimals = 0
colorful_pivot_count (df , index , columns , values , decimals )