Skip to content

Instantly share code, notes, and snippets.

@philshem
Last active August 29, 2015 13:56
Show Gist options
  • Save philshem/8820732 to your computer and use it in GitHub Desktop.
Save philshem/8820732 to your computer and use it in GitHub Desktop.
Collect daily Wikipedia page view counts for an array of terms. In this case, it's 'Advisor' and 'Adviser'. It helps to check that the Wikipedia page exists, first.
import requests
import collections
import time
searchlist = ['Advisor','Adviser']
minyear = 2008
maxyear = 2014
for search in searchlist:
views = {}
for year in xrange(minyear,maxyear+1):
for month in xrange(1,12+1):
if len(str(month)) == 1:
printmonth = str(0)+str(month)
else:
printmonth = str(month)
url = 'http://stats.grok.se/json/en/'+str(year)+printmonth+'/'+search
month = requests.get(url).json()
time.sleep(1) # be nice to API
views.update(month.get('daily_views',''))
sorted_views = collections.OrderedDict(sorted(views.items()))
for element in sorted_views:
if sorted_views[element] != 0:
print ','.join((search,str(element),str(sorted_views[element])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment