This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set colors | |
cmap=['white','red','orange','yellow','green','blue', | |
'indigo','violet','purple','grey','pink', | |
'brown','black'] | |
f, ax = plt.subplots(figsize=(6, 18)) | |
# drop duplicates for bookDf **End of book A is the start of book B | |
df = bookDf.copy() | |
df.drop_duplicates(['Date'],inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adding needed columns | |
bookDf['DateOrig'] = bookDf['Date'] | |
bookDf['Day'] = bookDf['Date'].apply(lambda x: x.day) | |
bookDf['Month'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%b')) | |
bookDf['DOW'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%a')) | |
bookDf['Month_num'] = bookDf['Date'].apply(lambda x: x.month) | |
bookDf['DOW_num'] = bookDf['Date'].apply(lambda x: x.weekday()) | |
bookDf['Week_num'] = bookDf['Date'].apply(lambda x: int(dt.datetime.strftime(x,'%W'))) | |
#add proxy for different colours |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a df with date from 1st to last day of year from min_year | |
dateList = pd.DataFrame(list(date_generator(dt.datetime(year,1,1,0,0,0),dt.datetime(year,12,31,0,0,0))),columns=['Date']) | |
dateList.Date = dateList.Date.astype('O') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def date_generator(from_date,to_date): | |
while from_date<=to_date: | |
yield from_date | |
from_date = from_date + dt.timedelta(days=1) | |
# create a new df with 2 columns | |
# col1 : Title, col2: DateRead | |
Title = [] | |
Date = [] | |
for index,row in booksv2.iterrows(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import datetime as dt | |
booksv2 = pd.read_csv('book1.csv') | |
booksv2['Start'] = booksv2['Start'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y')) | |
booksv2['End'] = booksv2['End'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y')) | |
min_date = min(list(booksv2['Start'])+list(booksv2['End'])) | |
max_date = max(list(booksv2['Start'])+list(booksv2['End'])) | |
#year = min_date.year |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import plotly | |
import plotly.plotly as py | |
fig = genSankey(df,cat_cols=['lvl1','lvl2','lvl3','lvl4'],value_cols='count',title='Word Etymology') | |
plotly.offline.plot(fig, validate=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'): | |
# maximum of 6 value cols -> 6 colors | |
colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464'] | |
labelList = [] | |
colorNumList = [] | |
for catCol in cat_cols: | |
labelListTemp = list(set(df[catCol].values)) | |
colorNumList.append(len(labelListTemp)) | |
labelList = labelList + labelListTemp | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
app.layout = html.Div([ | |
html.Div(dcc.Graph(id='Graph',figure=fig)), | |
html.Div(className='row', children=[ | |
html.Div([html.H2('Overall Data'), | |
html.P('Num of nodes: ' + str(len(G.nodes))), | |
html.P('Num of edges: ' + str(len(G.edges)))], | |
className='three columns'), | |
html.Div([ | |
html.H2('Selected Data'), | |
html.Div(id='selected-data'), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fig = go.Figure(data=[edge_trace, node_trace], | |
layout=go.Layout( | |
title='<br>Network Graph of '+str(num_nodes)+' rules', | |
titlefont=dict(size=16), | |
showlegend=False, | |
hovermode='closest', | |
margin=dict(b=20,l=5,r=5,t=40), | |
annotations=[ dict( | |
showarrow=False, | |
xref="paper", yref="paper", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
top_words = [] | |
#loop to find top 5 words of each class in the dataset | |
for code in vect_data.index: | |
top_words.append([code,find_top_words(code,5)]) | |
#print the list of top words | |
top_words |
NewerOlder