jbhardwaj · February 28, 2013 22:11
diff --git a/realData.py b/realData.py
 from __future__ import division
 import re, string, os, math, time, sys, random
 from operator import itemgetter
 import tweetstream

 def strTimeProp(start, end, format, prop):
    """Get a time at a proportion of a range of two formatted times.

    start and end should be strings specifying times formated in the
    given format (strftime-style), giving an interval [start, end].
    prop specifies how a proportion of the interval to be taken after
    start.  The returned time will be in the specified format.
    """

    stime = time.mktime(time.strptime(start, format))
    etime = time.mktime(time.strptime(end, format))

    ptime = stime + prop * (etime - stime)

    return time.strftime(format, time.localtime(ptime))


 def randomDate(start, end, prop):
    return strTimeProp(start, end, '%m/%d/%Y', prop)
 def randomDateTime(start, end, prop):
    return strTimeProp(start, end, '%m/%d/%Y %I:%M %p', prop)

 r = re.compile("[^a-zA-Z0-9 ]", re.UNICODE)

 #Enter twitter credentials here
 stream = tweetstream.SampleStream("USERNAME","PASSWORD")

 f = open('names.csv', 'w')

 #Generates a Userset of n users with random ids between 1-10000 and joindates between 1/1/2008 and 1/1/2012
 n = 0

 uids = []

 for tweet in stream:
    try:
            uid = random.randint(1,10000)
            while (uid in uids):
                uid = random.randint(1,10000)
            date = randomDate("1/1/2008", "1/1/2012", random.random())  
            f.write(tweet['user']['name'].decode('ascii') + ", " + date + ", " + str(uid) + ",\n")
            print tweet['user']['name'].decode('ascii') + ", " + date + ", " + str(uid) + ","
            uids.append(uid)
            if(n<10):
                n+=1
            else:
                break
    except KeyError:
        pass
    except UnicodeDecodeError:
        pass
    except UnicodeEncodeError:
        pass

 f.close()

 print "IDs: " + str(uids)

 #Generates a random number (between 1 and 20) of tweets per user
 f = open('weets.csv','w')
 wids = []
 for uid in uids:
    n = random.randint(1,20)
    i = 0
    for tweet in stream:
        try: 
            wid = random.randint(1,1000000)
            while (wid in wids):
                wid = random.randint(1,1000000)
            datetime = randomDateTime("1/1/2008 1:00 AM", "1/1/2012 1:00 AM", random.random())
            message = ''.join(tweet['text'].decode('ascii').splitlines()).replace(',', ' ')
            f.write(str(wid) + ", " + str(uid) + ", " + message + ", " + datetime + "\n")
            print "[" + str(i) + "/" + str(n) + "] " + str(wid) + ", " + str(uid) + ", " + message + ", " + datetime + ", "
            wids.append(wid)
            if(i<n):
                i+=1
            else:
                break  
        except KeyError:
            pass
        except UnicodeDecodeError:
            pass
        except UnicodeEncodeError:
            pass

 f.close()
	from __future__ import division
	import re, string, os, math, time, sys, random
	from operator import itemgetter
	import tweetstream

	def strTimeProp(start, end, format, prop):
	"""Get a time at a proportion of a range of two formatted times.

	start and end should be strings specifying times formated in the
	given format (strftime-style), giving an interval [start, end].
	prop specifies how a proportion of the interval to be taken after
	start. The returned time will be in the specified format.
	"""

	stime = time.mktime(time.strptime(start, format))
	etime = time.mktime(time.strptime(end, format))

	ptime = stime + prop * (etime - stime)

	return time.strftime(format, time.localtime(ptime))


	def randomDate(start, end, prop):
	return strTimeProp(start, end, '%m/%d/%Y', prop)
	def randomDateTime(start, end, prop):
	return strTimeProp(start, end, '%m/%d/%Y %I:%M %p', prop)

	r = re.compile("[^a-zA-Z0-9 ]", re.UNICODE)

	#Enter twitter credentials here
	stream = tweetstream.SampleStream("USERNAME","PASSWORD")

	f = open('names.csv', 'w')

	#Generates a Userset of n users with random ids between 1-10000 and joindates between 1/1/2008 and 1/1/2012
	n = 0

	uids = []

	for tweet in stream:
	try:
	uid = random.randint(1,10000)
	while (uid in uids):
	uid = random.randint(1,10000)
	date = randomDate("1/1/2008", "1/1/2012", random.random())
	f.write(tweet['user']['name'].decode('ascii') + ", " + date + ", " + str(uid) + ",\n")
	print tweet['user']['name'].decode('ascii') + ", " + date + ", " + str(uid) + ","
	uids.append(uid)
	if(n<10):
	n+=1
	else:
	break
	except KeyError:
	pass
	except UnicodeDecodeError:
	pass
	except UnicodeEncodeError:
	pass

	f.close()

	print "IDs: " + str(uids)

	#Generates a random number (between 1 and 20) of tweets per user
	f = open('weets.csv','w')
	wids = []
	for uid in uids:
	n = random.randint(1,20)
	i = 0
	for tweet in stream:
	try:
	wid = random.randint(1,1000000)
	while (wid in wids):
	wid = random.randint(1,1000000)
	datetime = randomDateTime("1/1/2008 1:00 AM", "1/1/2012 1:00 AM", random.random())
	message = ''.join(tweet['text'].decode('ascii').splitlines()).replace(',', ' ')
	f.write(str(wid) + ", " + str(uid) + ", " + message + ", " + datetime + "\n")
	print "[" + str(i) + "/" + str(n) + "] " + str(wid) + ", " + str(uid) + ", " + message + ", " + datetime + ", "
	wids.append(wid)
	if(i<n):
	i+=1
	else:
	break
	except KeyError:
	pass
	except UnicodeDecodeError:
	pass
	except UnicodeEncodeError:
	pass

	f.close()
No results found