Skip to content

Instantly share code, notes, and snippets.

@jbhardwaj
Created February 28, 2013 22:11
Show Gist options
  • Select an option

  • Save jbhardwaj/5060569 to your computer and use it in GitHub Desktop.

Select an option

Save jbhardwaj/5060569 to your computer and use it in GitHub Desktop.
Pulls data from Twitter and writes to a .csv data file for use with Witter
from __future__ import division
import re, string, os, math, time, sys, random
from operator import itemgetter
import tweetstream
def strTimeProp(start, end, format, prop):
"""Get a time at a proportion of a range of two formatted times.
start and end should be strings specifying times formated in the
given format (strftime-style), giving an interval [start, end].
prop specifies how a proportion of the interval to be taken after
start. The returned time will be in the specified format.
"""
stime = time.mktime(time.strptime(start, format))
etime = time.mktime(time.strptime(end, format))
ptime = stime + prop * (etime - stime)
return time.strftime(format, time.localtime(ptime))
def randomDate(start, end, prop):
return strTimeProp(start, end, '%m/%d/%Y', prop)
def randomDateTime(start, end, prop):
return strTimeProp(start, end, '%m/%d/%Y %I:%M %p', prop)
r = re.compile("[^a-zA-Z0-9 ]", re.UNICODE)
#Enter twitter credentials here
stream = tweetstream.SampleStream("USERNAME","PASSWORD")
f = open('names.csv', 'w')
#Generates a Userset of n users with random ids between 1-10000 and joindates between 1/1/2008 and 1/1/2012
n = 0
uids = []
for tweet in stream:
try:
uid = random.randint(1,10000)
while (uid in uids):
uid = random.randint(1,10000)
date = randomDate("1/1/2008", "1/1/2012", random.random())
f.write(tweet['user']['name'].decode('ascii') + ", " + date + ", " + str(uid) + ",\n")
print tweet['user']['name'].decode('ascii') + ", " + date + ", " + str(uid) + ","
uids.append(uid)
if(n<10):
n+=1
else:
break
except KeyError:
pass
except UnicodeDecodeError:
pass
except UnicodeEncodeError:
pass
f.close()
print "IDs: " + str(uids)
#Generates a random number (between 1 and 20) of tweets per user
f = open('weets.csv','w')
wids = []
for uid in uids:
n = random.randint(1,20)
i = 0
for tweet in stream:
try:
wid = random.randint(1,1000000)
while (wid in wids):
wid = random.randint(1,1000000)
datetime = randomDateTime("1/1/2008 1:00 AM", "1/1/2012 1:00 AM", random.random())
message = ''.join(tweet['text'].decode('ascii').splitlines()).replace(',', ' ')
f.write(str(wid) + ", " + str(uid) + ", " + message + ", " + datetime + "\n")
print "[" + str(i) + "/" + str(n) + "] " + str(wid) + ", " + str(uid) + ", " + message + ", " + datetime + ", "
wids.append(wid)
if(i<n):
i+=1
else:
break
except KeyError:
pass
except UnicodeDecodeError:
pass
except UnicodeEncodeError:
pass
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment