Skip to content

Instantly share code, notes, and snippets.

@ejamesc
Created February 8, 2015 17:24
Show Gist options
  • Select an option

  • Save ejamesc/8b43f16251a0bb4e7b54 to your computer and use it in GitHub Desktop.

Select an option

Save ejamesc/8b43f16251a0bb4e7b54 to your computer and use it in GitHub Desktop.
Download Tumblr API Results
import xlwt
import requests
API_KEY = "DFIqb6odlalG3lzJbmcSlaaCwM9EkFyaAPzgSeKD9oQTQtWMVm"
URL = "http://api.tumblr.com/v2/tagged"
def main(search_tag, filename, pages):
book = xlwt.Workbook()
sh = book.add_sheet(search_tag)
# Column headings
f = xlwt.Font()
f.bold = True
h_style = xlwt.XFStyle()
h_style.font = f
headings = ["Post URL", "Text Content", "Image URL", "Note Count"]
for n in xrange(0, len(headings)):
sh.write(0, n, headings[n], h_style)
params = {'tag': search_tag, 'api_key': API_KEY}
r = requests.get(URL, params=params)
res, n = [], 0
res = res + r.json()['response']
while n < pages:
print "Downloading page ", n + 1
n += 1
last_timestamp = res[-1]['timestamp']
params['before'] = last_timestamp
r = requests.get(URL, params=params)
res = res + r.json()['response']
row_num = 1
for r in res:
if r['type'] == "text":
title = r['title'] if r['title'] else ""
body = title + "\n" + r['body'] if r['body'] else title
sh.write(row_num, 0, r['post_url'])
sh.write(row_num, 1, body)
sh.write(row_num, 2, "")
sh.write(row_num, 3, r['note_count'])
row_num += 1
elif r['type'] == "image":
body = r['caption']
sh.write(row_num, 0, r['post_url'])
sh.write(row_num, 1, body)
sh.write(row_num, 2, r['image_permalink'])
sh.write(row_num, 3, r['note_count'])
row_num += 1
book.save(filename)
if __name__ == "__main__":
# Change these parameters
search_tag = "otherkin"
filename = "Tumblr_Corpus_Linguistics.xls"
pages = 20
main(search_tag, filename, pages)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment