Last active
February 21, 2021 09:20
-
-
Save philippschmalen/cabfd63699a61d160016112c25a5c885 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is an example of the app.py file for a dash app deployed on Heroku. | |
It creates http://esg-trending-test.herokuapp.com/ | |
Follow the steps on https://dash.plotly.com/deployment | |
to delploy the app with Heroku. | |
Note: | |
Put your data into the same folder or subfolder where `app.py` lives. | |
I used a conda virtual environment, but installed all required packages | |
with `pip install` instead of `conda install`. This ensures that the | |
requirements.txt is readable by the Heroku Dyno. | |
Author: Philipp Schmalen | |
License: MIT | |
""" | |
import os | |
import pandas as pd | |
# plotly | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
# dash | |
import dash | |
import dash_core_components as dcc | |
import dash_html_components as html | |
#~ --------------- SETTINGS --------------- ~ | |
DATA_DIR = './data/' # <WHERE YOUR DATA LIVES> | |
FILENAME = 'related_queries_210218_2021.csv' # <NAME OF THE FILE>.csv | |
#~ ---------------------------------------- ~ | |
#~ --------------- DATA PREPARATION --------------- ~ | |
def load_data(data_dir, filename): | |
df_raw = pd.read_csv(os.path.join(DATA_DIR, FILENAME)) | |
df = df_raw.copy() | |
return df | |
def prepare_data(df): | |
""" Feature engineering for Google trends 'related queries' | |
Input | |
df: dataframe | |
Return | |
dfr, dft: dataframes with rising search interest (r) and top search interest (t) | |
""" | |
# feature engineering: totals and normalize | |
grouped = df.groupby(['ranking']).value # group values by ranking | |
df['value_total'] = grouped.transform('sum') # total sum | |
df['value_normalized'] = (df.value-grouped.transform('min'))/(grouped.transform('max')-grouped.transform('min')) # normalize | |
df['value_normalized_total'] = df.groupby(['ranking']).value_normalized.transform('sum') # total sum of normalized values | |
# labelling | |
df['labels'] = df['query'].apply(lambda x: x.replace(' ', '<br>')) | |
df['ranking_label'] = df.ranking.replace({'top': 'Evergreens', 'rising': 'Trending'}) | |
# rankings: top (t) and rising (r) | |
dfr, dft = df.query('ranking == "rising"'), df.query('ranking == "top"') | |
return dfr, dft | |
#~ --------------- PIPELINE --------------- ~ | |
dfr, dft = (load_data(DATA_DIR, FILENAME) | |
.pipe(prepare_data)) | |
#~ --------------- PLOT --------------- ~ | |
fig = make_subplots(2, 1, | |
specs=[[{"type": "domain"}], [ {"type": "domain"}]]) | |
fig.add_trace(go.Treemap( | |
labels = dfr['labels'], | |
parents = dfr.ranking_label, | |
values = dfr.value_normalized, | |
), 1, 1) | |
fig.add_trace(go.Treemap( | |
labels = dft['labels'], | |
parents = dft.ranking_label, | |
values = dft.value_normalized | |
), 2, 1) | |
fig.update_layout(margin=dict(t=10, b=10, r=10, l=10)) | |
fig.update_traces( | |
opacity=1, | |
textposition='middle center', | |
textfont={'family':"Arial", 'size': 18}, | |
hoverinfo= "label", | |
tiling = {'squarifyratio': 1, 'pad': 0}, | |
marker = {'depthfade': False} | |
) | |
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] | |
app = dash.Dash(__name__, external_stylesheets=external_stylesheets) | |
server = app.server | |
app.layout = html.Div(children=[ | |
html.H1(children='Trending topics in sustainable finance'), | |
html.Div(children=''' | |
Know what people search | |
'''), | |
dcc.Graph( | |
id='example-graph', | |
figure=fig | |
) | |
]) | |
if __name__ == '__main__': | |
app.run_server(debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment