Skip to content

Instantly share code, notes, and snippets.

@philippschmalen
Last active February 21, 2021 09:20
Show Gist options
  • Save philippschmalen/cabfd63699a61d160016112c25a5c885 to your computer and use it in GitHub Desktop.
Save philippschmalen/cabfd63699a61d160016112c25a5c885 to your computer and use it in GitHub Desktop.
"""
This is an example of the app.py file for a dash app deployed on Heroku.
It creates http://esg-trending-test.herokuapp.com/
Follow the steps on https://dash.plotly.com/deployment
to delploy the app with Heroku.
Note:
Put your data into the same folder or subfolder where `app.py` lives.
I used a conda virtual environment, but installed all required packages
with `pip install` instead of `conda install`. This ensures that the
requirements.txt is readable by the Heroku Dyno.
Author: Philipp Schmalen
License: MIT
"""
import os
import pandas as pd
# plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# dash
import dash
import dash_core_components as dcc
import dash_html_components as html
#~ --------------- SETTINGS --------------- ~
DATA_DIR = './data/' # <WHERE YOUR DATA LIVES>
FILENAME = 'related_queries_210218_2021.csv' # <NAME OF THE FILE>.csv
#~ ---------------------------------------- ~
#~ --------------- DATA PREPARATION --------------- ~
def load_data(data_dir, filename):
df_raw = pd.read_csv(os.path.join(DATA_DIR, FILENAME))
df = df_raw.copy()
return df
def prepare_data(df):
""" Feature engineering for Google trends 'related queries'
Input
df: dataframe
Return
dfr, dft: dataframes with rising search interest (r) and top search interest (t)
"""
# feature engineering: totals and normalize
grouped = df.groupby(['ranking']).value # group values by ranking
df['value_total'] = grouped.transform('sum') # total sum
df['value_normalized'] = (df.value-grouped.transform('min'))/(grouped.transform('max')-grouped.transform('min')) # normalize
df['value_normalized_total'] = df.groupby(['ranking']).value_normalized.transform('sum') # total sum of normalized values
# labelling
df['labels'] = df['query'].apply(lambda x: x.replace(' ', '<br>'))
df['ranking_label'] = df.ranking.replace({'top': 'Evergreens', 'rising': 'Trending'})
# rankings: top (t) and rising (r)
dfr, dft = df.query('ranking == "rising"'), df.query('ranking == "top"')
return dfr, dft
#~ --------------- PIPELINE --------------- ~
dfr, dft = (load_data(DATA_DIR, FILENAME)
.pipe(prepare_data))
#~ --------------- PLOT --------------- ~
fig = make_subplots(2, 1,
specs=[[{"type": "domain"}], [ {"type": "domain"}]])
fig.add_trace(go.Treemap(
labels = dfr['labels'],
parents = dfr.ranking_label,
values = dfr.value_normalized,
), 1, 1)
fig.add_trace(go.Treemap(
labels = dft['labels'],
parents = dft.ranking_label,
values = dft.value_normalized
), 2, 1)
fig.update_layout(margin=dict(t=10, b=10, r=10, l=10))
fig.update_traces(
opacity=1,
textposition='middle center',
textfont={'family':"Arial", 'size': 18},
hoverinfo= "label",
tiling = {'squarifyratio': 1, 'pad': 0},
marker = {'depthfade': False}
)
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
server = app.server
app.layout = html.Div(children=[
html.H1(children='Trending topics in sustainable finance'),
html.Div(children='''
Know what people search
'''),
dcc.Graph(
id='example-graph',
figure=fig
)
])
if __name__ == '__main__':
app.run_server(debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment