Skip to content

Instantly share code, notes, and snippets.

@veev
Created February 12, 2016 18:19
Show Gist options
  • Save veev/a870ee9e53835e0112ba to your computer and use it in GitHub Desktop.
Save veev/a870ee9e53835e0112ba to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
import urllib2
import base64
import xml
import sys
import time
import sched
def post_getTweets(ruleQuery, fromDate, toDate, nextURL):
url = 'https://gnip-api.twitter.com/search/fullarchive/accounts/Twitter-Marketing-Manifold/prod.json'
UN = 'email@address'
PWD = 'XXXXPasswordXXXX'
rule = ruleQuery
query = ''
if (nextURL == ''):
query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","maxResults":"500"}'
else:
query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","next":"' + nextURL + '", "maxResults":"500"}'
#print query
base64string = base64.encodestring('%s:%s' % (UN, PWD)).replace('\n', '')
req = urllib2.Request(url=url, data=query)
req.add_header('Content-type', 'application/json')
req.add_header("Authorization", "Basic %s" % base64string)
try:
response = urllib2.urlopen(req)
the_page = response.read()
return the_page
except urllib2.HTTPError as e:
print e.read()
return e.read()
def post_getCounts(ruleQuery, fromDate, toDate):
url = 'https://gnip-api.twitter.com/search/fullarchive/accounts/Twitter-Marketing-Manifold/prod/counts.json'
UN = 'email@address'
PWD = 'XXXXPasswordXXXX'
rule = ruleQuery
query = ''
query = '{"query":"' + rule + '","fromDate": "' + str(fromDate) + '","toDate":"' + str(toDate) + '","bucket":"day"}'
#print query
base64string = base64.encodestring('%s:%s' % (UN, PWD)).replace('\n', '')
req = urllib2.Request(url=url, data=query)
req.add_header('Content-type', 'application/json')
req.add_header("Authorization", "Basic %s" % base64string)
try:
response = urllib2.urlopen(req)
the_page = response.read()
return the_page
except urllib2.HTTPError as e:
print e.read()
return e.read()
def get_tweets(prefix, rule, fromDate, toDate, nextUrl, startCount = 0):
counter = startCount
moreTweets = True
while moreTweets:
print prefix + " page " + str(counter)
results = post_getTweets(rule, fromDate, toDate, nextUrl)
try:
data = json.loads(results)
tweetStart = str(500 * counter)
tweetEnd = str(500 * (counter + 1))
with open('data/' + prefix + '_' + tweetStart + '-' + tweetEnd + '.json', 'w') as outfile:
json.dump(data, outfile, indent=1)
if 'next' in data:
nextUrl = data['next']
counter = counter + 1
moreTweets = True
else:
moreTweets = False
except Exception as e:
print e
moreTweets = False
print "Done!"
def get_counts(rule, fromDate, toDate):
results = post_getCounts(rule, fromDate, toDate)
print results
query = "superbowl OR Super Bowl OR SB50 OR Broncos OR BRONCOS OR PANTHERS OR SuperBowl"
prefix = "superbowl"
get_tweets(prefix, query, 201602020000, 201602090000, "", 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment