Created
October 6, 2023 20:35
-
-
Save addshore/c6be24d02b841bbddaa279eb68aec5bd to your computer and use it in GitHub Desktop.
COVID-19 Wikipedia pageview spikes, 2019-2022 (plotly functions)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dfToPlot = allThePandasFiltered.copy() | |
def do_pyplot( data, title, logY=False, unstackAndSort=False, grid=False, trends=False, plotMax=False, ylim=False ): | |
pd.options.plotting.backend = "matplotlib" | |
fig,ax = plt.subplots(figsize=plotSize) | |
plt.title(title) | |
ax.set_xlabel("Date") | |
ax.set_ylabel("Pageviews") | |
if ylim: | |
ax.set_ylim(ylim) | |
if(logY): | |
ax.set_yscale('log') | |
ax.set_ylabel("Pageviews (log)") | |
ax.get_yaxis().set_major_formatter( | |
mtick.FuncFormatter(lambda x, p: format(int(x), ','))) | |
ax.grid(grid) | |
if unstackAndSort: | |
data = data.unstack() | |
data = data[data.max().sort_values(ascending=True).index] | |
# Only get the top ones if requested | |
if unstackAndSort > 1: | |
data = data.iloc[:,-unstackAndSort:] | |
data.plot.area(ax=ax, stacked=True) | |
handles, labels = ax.get_legend_handles_labels() | |
ax.legend(reversed(handles), reversed(labels), title=None) | |
ax.get_yaxis().set_major_formatter( | |
mtick.FuncFormatter(lambda x, p: format(int(x), ','))) | |
else: | |
data.plot(label='daily') | |
if trends: | |
for index in trendLines: | |
data.rolling(window=index).mean().plot(label=trendLines[index]['label'], linestyle=trendLines[index]['style']) | |
ax.legend() | |
if plotMax: | |
maxIndex, maxValue = data.idxmax(), data.max() | |
ax.annotate(format(int(maxValue), ','), xy=(maxIndex, maxValue), xytext=(maxIndex, maxValue+(maxValue/100))) | |
return fig, ax | |
def add_plotly_common(fig): | |
# https://plotly.com/python-api-reference/generated/plotly.graph_objects.Figure.html?highlight=add_vline#plotly.graph_objects.Figure.add_vline | |
# Got to use vrect, not line because of https://community.plotly.com/t/error-if-adding-annotation-to-add-vline-on-plotly-express-when-x-is-a-datetime/52497 | |
for event in events: | |
fig.add_vrect(line_width=1, line_dash="dash", line_color="lightgrey", annotation_textangle=-90, annotation_font=dict( color="grey", size=12), x0=event['date'], x1=event['date'], annotation_text=event['text']) | |
fig.update_layout(dict(updatemenus=[ | |
dict( | |
type = "buttons", | |
direction = "left", | |
buttons=list([ | |
# https://stackoverflow.com/a/69847069/4746236 | |
dict( | |
args=["visible", "legendonly"], | |
label="Deselect All", | |
method="restyle" | |
), | |
dict( | |
args=["visible", True], | |
label="Select All", | |
method="restyle" | |
), | |
# https://stackoverflow.com/a/61237669/4746236 | |
dict( | |
args=[{'yaxis.type': 'linear'}], | |
label="Linear Scale", | |
method="relayout" | |
), | |
dict( | |
args=[{'yaxis.type': 'log'}], | |
label="Log Scale", | |
method="relayout" | |
), | |
]), | |
pad={"r": 10, "t": 10}, | |
showactive=False, | |
x=1, | |
xanchor="right", | |
y=1.1, | |
yanchor="top" | |
), | |
] | |
)) | |
fig.update_layout( | |
showlegend=True, | |
width=2000, | |
height=1000, | |
) | |
def add_plotly_title(fig, title): | |
genText = "Using current and previous page titles linked to Wikidata items in any of the following queries(" + ", ".join(lookupWikidataStatements) + ")" | |
pageviewText = "and the Wikimedia page view data https://meta.wikimedia.org/wiki/Research:Page_view (data points less than 1k views removed). By @addshore" | |
fig.update_layout( | |
title=go.layout.Title( | |
text=title+"<br><sup>"+genText+"<br>"+pageviewText + "</sup>", | |
xref="paper", | |
x=0 | |
), | |
) | |
def do_plotly_simple(data, title): | |
pd.options.plotting.backend = "plotly" | |
fig = data.plot( | |
labels=dict(index="date",value="pageviews",variable=""), | |
) | |
add_plotly_common(fig) | |
add_plotly_title(fig,title) | |
show_and_save_plotly_fig(fig,title) | |
def do_plotly_group(rawData, groupBy, title): | |
grouped=rawData.groupby(['date',groupBy])['pageviews'].sum().reset_index() | |
grouped = grouped.sort_values(by=['date', 'pageviews']).reset_index(drop=True) | |
group = grouped.groupby(groupBy) | |
fig = go.Figure() | |
# each group iteration returns a tuple | |
# (group name, dataframe) | |
for group_name, df in group: | |
fig.add_trace( | |
go.Scatter( | |
x=df['date'] | |
, y=df['pageviews'] | |
# , fill='tozeroy' | |
, name=group_name | |
)) | |
add_plotly_common(fig) | |
add_plotly_title(fig,title) | |
show_and_save_plotly_fig(fig,title) | |
def show_and_save_plotly_fig(fig,title): | |
fig.show() | |
fig.write_html("publish/wd-topic-pageviews/"+topicTitle+"/" + startDay + "_" + endDay + "/plotly_" + title.replace(" ","-") + ".html") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment