Created
June 26, 2020 00:11
-
-
Save slaporte/61c6653c341cba235bc91fd80d1b8ad7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[1]: | |
import pymysql | |
import os | |
from pprint import pprint | |
from collections import Counter | |
from datetime import datetime | |
import matplotlib.pyplot as plot | |
# In[2]: | |
import sys | |
get_ipython().system('{sys.executable} -m pip install plotly==4.8.1') | |
# In[3]: | |
host = os.environ['MYSQL_HOST'] | |
user = os.environ['MYSQL_USERNAME'] | |
password = os.environ['MYSQL_PASSWORD'] | |
conn = pymysql.connect( | |
host=host, | |
user=user, | |
password=password | |
) | |
# In[4]: | |
def get_daily_edits(title): | |
title = title.replace(' ', '_') | |
with conn.cursor() as cur: | |
cur.execute('use enwiki_p') | |
cur.execute(""" | |
SELECT * | |
FROM revision JOIN page | |
ON page.page_id = revision.rev_page | |
WHERE page.page_namespace = 0 AND page.page_title = '%s' | |
ORDER BY revision.rev_timestamp DESC | |
""" % title) | |
edits = cur.fetchall() | |
day_counter = Counter([datetime.strptime(d[4][0:8].decode("utf-8"), '%Y%m%d') for d in edits]) | |
return {'date': list(day_counter.keys()), 'edits': list(day_counter.values())} | |
# In[5]: | |
candidates = ['Keisha Lance Bottoms', | |
'Tammy Baldwin', | |
'Karen Bass', | |
'Val Demings', | |
'Tammy Duckworth', | |
'Kamala Harris', | |
'Maggie Hassan', | |
'Michelle Lujan Grisham', | |
'Gina Raimondo', | |
'Susan Rice', | |
'Elizabeth Warren', | |
'Gretchen Whitmer', | |
'Michelle Obama'] | |
# In[6]: | |
edits = {} | |
for candidate in candidates: | |
edits[candidate] = get_daily_edits(candidate) | |
# In[10]: | |
# Using plotly.express | |
import plotly.express as px | |
import pandas as pd | |
for candidate in candidates: | |
df = pd.DataFrame(edits[candidate], columns=['date', 'edits']) | |
fig = px.line(df, x='date', y='edits') | |
fig.update_layout(xaxis_range=['2020-01-01', datetime.today().strftime('%Y-%m-%d')], | |
yaxis_range=[0, 80], # should be set via max() | |
title_text=candidate) | |
fig.update_xaxes(rangeslider_visible=True) | |
fig.show() | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment