Skip to content

Instantly share code, notes, and snippets.

@revox
Created November 17, 2015 08:35
Show Gist options
  • Save revox/5add7219bcb6eaa3efcd to your computer and use it in GitHub Desktop.
Save revox/5add7219bcb6eaa3efcd to your computer and use it in GitHub Desktop.
Reads output from twitter_streaming_api.py and nicely (avoids rate limit) gets the user profiles into a CSV
import twitter
# http://mike.verdone.ca/twitter/ and Russel, MTSW, second edition, 2013
import json, csv, time, sys
consumer_key=""
consumer_secret=""
access_token=""
access_token_secret=""
auth = twitter.oauth.OAuth(access_token, access_token_secret,consumer_key, consumer_secret)
twitter_api = twitter.Twitter(auth=auth)
outfile = open('mapped_tweet_data.csv', 'w')
csvwriter = csv.writer(outfile)
def clean(val):
clean = ""
if val:
clean = val.encode('utf-8')
return clean
# read the file from disk...
# NB: in universal newline mode as tweets can have newlines - see http://stackoverflow.com/questions/6726953/open-the-file-in-universal-newline-mode-using-csv-module-django
with open('paris_data_new.csv', 'rU') as inputfile:
processed_total = 0
count = 0
users = []
tweetreader = csv.reader(inputfile)
for row in tweetreader:
print row[1]
count += 1
processed_total += 1
users.append(row[1])
if (count>99):
print "Processed total = ",processed_total
try:
resp = twitter_api.users.lookup(screen_name=','.join(users))
# print json.dumps(resp, indent=1)
# print type(resp)
# print len(resp)
for user in resp:
csvwriter.writerow([clean(user['screen_name']),user['created_at'],user['profile_image_url'],clean(user['location']),user['geo_enabled'],user['lang'], clean(user['time_zone'])])
outfile.flush()
time.sleep(16)
except KeyboardInterrupt:
sys.exit(0)
except Exception as e:
print "error :: ", e
raise
count = 0
users = []
outfile.close
print "done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment