Skip to content

Instantly share code, notes, and snippets.

@rrosasl
Created November 25, 2020 18:05
Show Gist options
  • Save rrosasl/1c8425053a0f7e9801691ad22a1d7159 to your computer and use it in GitHub Desktop.
Save rrosasl/1c8425053a0f7e9801691ad22a1d7159 to your computer and use it in GitHub Desktop.
SankeyDiagram
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
'''
https://gist.github.com/ken333135/09f8793fff5a6df28558b17e516f91ab
'''
# maximum of 6 value cols -> 6 colors
colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464']
labelList = []
colorNumList = []
for catCol in cat_cols:
labelListTemp = list(set(df[catCol].values))
colorNumList.append(len(labelListTemp))
labelList = labelList + labelListTemp
# remove duplicates from labelList
labelList = list(dict.fromkeys(labelList))
# define colors based on number of levels
colorList = []
for idx, colorNum in enumerate(colorNumList):
colorList = colorList + [colorPalette[idx]]*colorNum
# transform df into a source-target pair
for i in range(len(cat_cols)-1):
if i==0:
sourceTargetDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
sourceTargetDf.columns = ['source','target','count']
else:
tempDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
tempDf.columns = ['source','target','count']
sourceTargetDf = pd.concat([sourceTargetDf,tempDf])
sourceTargetDf = sourceTargetDf.groupby(['source','target']).agg({'count':'sum'}).reset_index()
# add index for source-target pair
sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
# creating the sankey diagram
data = dict(
type='sankey',
node = dict(
pad = 15,
thickness = 20,
line = dict(
color = "black",
width = 0.5
),
label = labelList,
color = colorList
),
link = dict(
source = sourceTargetDf['sourceID'],
target = sourceTargetDf['targetID'],
value = sourceTargetDf['count']
)
)
layout = dict(
title = title,
font = dict(
size = 16
)
)
fig = dict(data=[data], layout=layout)
return fig
sankey_title = 'Vote by Ranking'
sankey_fig = genSankey(df_sankey,cat_cols=col_rounds,value_cols='value',title=sankey_title)
#plotly.offline.plot(fig, validate=False)
fig = go.Figure(sankey_fig)
fig.update_layout(width=int(1200))
fig.add_annotation(
x=0,
y=1.1,
showarrow= False,
text="First round")
fig.add_annotation(
x=1,
y=1.1,
showarrow= False,
text="Final round")
fig.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment