ejamesc · February 8, 2015 17:24
diff --git a/tumblr_corpus.py b/tumblr_corpus.py
 import xlwt
 import requests

 API_KEY = "DFIqb6odlalG3lzJbmcSlaaCwM9EkFyaAPzgSeKD9oQTQtWMVm"
 URL = "http://api.tumblr.com/v2/tagged"

 def main(search_tag, filename, pages):
    book = xlwt.Workbook()
    sh = book.add_sheet(search_tag)

    # Column headings
    f = xlwt.Font()
    f.bold = True
    h_style = xlwt.XFStyle()
    h_style.font = f

    headings = ["Post URL", "Text Content", "Image URL", "Note Count"]
    for n in xrange(0, len(headings)):
        sh.write(0, n, headings[n], h_style)

    params = {'tag': search_tag, 'api_key': API_KEY}
    r = requests.get(URL, params=params)
    res, n = [], 0
    res = res + r.json()['response']
    while n < pages:
        print "Downloading page ", n + 1
        n += 1
        last_timestamp = res[-1]['timestamp']
        params['before'] = last_timestamp
        r = requests.get(URL, params=params)
        res = res + r.json()['response']

    row_num = 1
    for r in res:
        if r['type'] == "text":
            title = r['title'] if r['title'] else ""
            body = title + "\n" + r['body'] if r['body'] else title

            sh.write(row_num, 0, r['post_url'])
            sh.write(row_num, 1, body)
            sh.write(row_num, 2, "")
            sh.write(row_num, 3, r['note_count'])
            row_num += 1
        elif r['type'] == "image":
            body = r['caption']

            sh.write(row_num, 0, r['post_url'])
            sh.write(row_num, 1, body)
            sh.write(row_num, 2, r['image_permalink'])
            sh.write(row_num, 3, r['note_count'])
            row_num += 1

    book.save(filename)

 if __name__ == "__main__":
    # Change these parameters
    search_tag = "otherkin"
    filename = "Tumblr_Corpus_Linguistics.xls"
    pages = 20
    main(search_tag, filename, pages)
	import xlwt
	import requests

	API_KEY = "DFIqb6odlalG3lzJbmcSlaaCwM9EkFyaAPzgSeKD9oQTQtWMVm"
	URL = "http://api.tumblr.com/v2/tagged"

	def main(search_tag, filename, pages):
	book = xlwt.Workbook()
	sh = book.add_sheet(search_tag)

	# Column headings
	f = xlwt.Font()
	f.bold = True
	h_style = xlwt.XFStyle()
	h_style.font = f

	headings = ["Post URL", "Text Content", "Image URL", "Note Count"]
	for n in xrange(0, len(headings)):
	sh.write(0, n, headings[n], h_style)

	params = {'tag': search_tag, 'api_key': API_KEY}
	r = requests.get(URL, params=params)
	res, n = [], 0
	res = res + r.json()['response']
	while n < pages:
	print "Downloading page ", n + 1
	n += 1
	last_timestamp = res[-1]['timestamp']
	params['before'] = last_timestamp
	r = requests.get(URL, params=params)
	res = res + r.json()['response']

	row_num = 1
	for r in res:
	if r['type'] == "text":
	title = r['title'] if r['title'] else ""
	body = title + "\n" + r['body'] if r['body'] else title

	sh.write(row_num, 0, r['post_url'])
	sh.write(row_num, 1, body)
	sh.write(row_num, 2, "")
	sh.write(row_num, 3, r['note_count'])
	row_num += 1
	elif r['type'] == "image":
	body = r['caption']

	sh.write(row_num, 0, r['post_url'])
	sh.write(row_num, 1, body)
	sh.write(row_num, 2, r['image_permalink'])
	sh.write(row_num, 3, r['note_count'])
	row_num += 1

	book.save(filename)

	if __name__ == "__main__":
	# Change these parameters
	search_tag = "otherkin"
	filename = "Tumblr_Corpus_Linguistics.xls"
	pages = 20
	main(search_tag, filename, pages)
No results found