Skip to content

Instantly share code, notes, and snippets.

@rjw57
Created May 12, 2015 13:53
Show Gist options
  • Save rjw57/8353c0afc4ad8baa7023 to your computer and use it in GitHub Desktop.
Save rjw57/8353c0afc4ad8baa7023 to your computer and use it in GitHub Desktop.

Scraping the LibDem party membership

A silly pile of scripts to plot Liberal Democrat party membership over time.

fetch.sh

Shell script to fetch data once every 150 seconds. Should be run from this directory. Needs data/ directory to exist.

parsedata.py

Parses JSON files downloaded by fetch.sh and saves dataset to data.npz.

plotdata.py

Plots data from data.npz to membership.png. Fiddle with source to plot growth since close of polls.

#!/bin/bash
while true; do
JSON=$(curl -s http://mdo.libdems.org.uk/numbers.json)
HASH=$(sha1sum <<<"$JSON" | cut -f1 -d' ')
cat >"data/membership_${HASH}.json" <<<"$JSON"
sleep 150
done
#!/usr/bin/env python3
import glob
import json
import numpy as np
import dateutil.parser
def main():
rows = []
for fn in glob.glob('data/*.json'):
with open(fn) as f:
d = json.load(f)
rows.append([
float(dateutil.parser.parse(d['runtime']).timestamp()),
int(d['new']), int(d['total']),
])
rows = np.array(rows)
rows = rows[np.argsort(rows[:, 0]), :]
np.savez_compressed('data.npz', data=rows)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
FROM_POLLING = False
LD_YELLOW = (253/255.0,187/255.0,48/255.0)
def main():
d = np.load('data.npz')['data']
ys = d[:, 1]
dates = list(datetime.datetime.fromtimestamp(dt) for dt in d[:, 0])
if FROM_POLLING:
dates = [datetime.datetime(2015, 5, 7, 22, 0)] + dates
ys = np.concatenate(([0], ys))
plt.gca().xaxis.set_major_locator(mdates.AutoDateLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d %b, %H:%M'))
plt.xlabel('Date and time')
plt.ylabel('New members since polling day')
plt.title('Liberal Democrat Party Membership')
plt.grid('on')
plt.plot(dates, ys, color=LD_YELLOW)
minx, maxx, miny, maxy = plt.axis()
plt.gca().fill_between(dates, ys, miny, color=LD_YELLOW)
plt.gcf().autofmt_xdate()
plt.savefig('membership.png')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment