Skip to content

Instantly share code, notes, and snippets.

@ferayebend
Created November 25, 2014 12:53
Show Gist options
  • Save ferayebend/6d9c5ca79ce0ef9f43ba to your computer and use it in GitHub Desktop.
Save ferayebend/6d9c5ca79ce0ef9f43ba to your computer and use it in GitHub Desktop.
twitter API tools
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
from pylab import *
import json
import codecs
import pymongo
import time
import sys
import os
import re
def getOldTweets(filename):
input_file = file(filename, "r")
tweets = []
for lines in input_file:
tweets.append(json.loads(lines))
return tweets
def get_users(filename):
users = []
f = open(filename)
for lines in f:
users.append(lines.strip())
return users
def stdoutStatus(jsonarray):
for tweet in jsonarray:
print tweet['text']
def inputDb(collection_name, filename):
'''
init db
'''
input_file = file(filename, "r")
for lines in input_file:#memory efficient loop
data ={}
tweet = json.loads(lines)
data['created_at'] = time.mktime(time.strptime(tweet['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))
data['user_name'] = tweet['user']['screen_name']
data['user_id'] = tweet['user']['id_str']
data['_id'] = tweet['id']
data['lang'] = tweet['lang']
data['text'] = tweet['text']
#print tweet['user']['screen_name']
#print tweet['entities']['hashtags']
data['hashtags'] = tweet['entities']['hashtags']
if collection_name.find_one({'_id':data['_id']}):
continue
else:
collection_name.insert(data) #takes care of duplicates
#input_file.close()
#posts.remove({u'lang':{'$nin':[u'tr']}}) #remove non turkish
def inputUser(collection_name, filename):
'''
init db
'''
input_file = file(filename, "r")
for lines in input_file:#memory efficient loop
data ={}
user = json.loads(lines)
try:
user['id']
except KeyError:
print user
continue #assuming there is an acceptable error
data['created_at'] = time.mktime(time.strptime(user['created_at'],"%a %b %d %H:%M:%S +0000 %Y"))
data['user_name'] = user['screen_name']
#data['user_id'] = tweet['user']['id_str']
data['_id'] = user['id']
data['listed_count'] = user['listed_count']
data['description'] = user['description']
data['followers_count'] = user['followers_count']
data['friends_count'] = user['friends_count']
data['statuses_count'] = user['statuses_count']
if collection_name.find_one({'_id':data['_id']}):
continue
else:
collection_name.insert(data) #takes care of duplicates
input_file.close()
#posts.remove({u'lang':{'$nin':[u'tr']}}) #remove non turkish
def KeywordFilter(InCollection,regex,sortindex,searchindex):
histogram_array=[]
for entry in InCollection.find({searchindex: {'$in':[re.compile('%s'%(regex))]} }):
histogram_array.append(entry[sortindex])#in seconds
return histogram_array
def KeywordFilterSimple(InCollection,regex,sortindex,searchindex):
histogram_array=[]
for entry in InCollection.find({searchindex: {'$in':[regex]} }):
histogram_array.append(entry[sortindex])#in seconds
return histogram_array
def KeywordComparison(posts):
posts.create_index([("created_at", pymongo.ASCENDING)])
OCdates = []
RTdates = []
for post in posts.find().sort([("created_at", pymongo.ASCENDING)]):
RTdates.append(post['created_at'])#in seconds
for post in posts.find({'text': {'$nin':[re.compile('RT @')]} }).sort([("created_at", pymongo.ASCENDING)]):
OCdates.append(post['created_at'])#in seconds
mindate = min(min(RTdates),min(OCdates))#in UTC seconds
maxdate = max(max(RTdates),max(OCdates))#in UTC seconds
nRTdates = (array(RTdates)-array(mindate))/array(3600.)#convert to hours
nOCdates = (array(OCdates)-array(mindate))/array(3600.)#convert to hours
nmindate = min(min(nRTdates),min(nOCdates))#
nmaxdate = max(max(nRTdates),max(nOCdates))
#my_bin = linspace(mindate,maxdate,200)
my_bin = linspace(nmindate,nmaxdate,200)
fig = plt.figure()
ax = fig.add_subplot(111)
nRT, bins, patch = ax.hist(nRTdates, bins=my_bin)
#nOC, bins, patch = ax.hist(nOCdates, bins=my_bin)
#ax.xaxis.set_minor_locator(MultipleLocator(3600))
#ax.xaxis.set_major_locator(MultipleLocator(12*3600))
'''
comparison keyword
'''
polis = 'polis*'
saldiri = 'sald?r*'
pdates = KeywordFilter(posts,polis,'created_at','text')
npdates = (array(pdates)-array(mindate))/array(3600.)
np, bins, patch = ax.hist(npdates, bins=my_bin)
print len(list(np))
start_time_formatted = time.strftime("%d %b %Y %H:%M:%S +0000",time.gmtime(mindate+2*3600))#türkiye saati
print max(list(nRT))
ax.text(nmindate,max(list(nRT)),start_time_formatted)
ylabel(r"birim zaman basina twit sayisi")
xlabel(r'ilk twitten itibaren gecen zaman')
show()
def KeywordComparison(posts,start_time,end_time,binsize,keywords):
posts.create_index([("created_at", pymongo.ASCENDING)])
RTdates = []
for post in posts.find({"created_at": {"$gte": start_time, "$lte": end_time}}).sort([("created_at", pymongo.ASCENDING)]):
RTdates.append(post['created_at'])#in seconds
mindate = min(RTdates)#in UTC seconds
maxdate = max(RTdates)#in UTC seconds
print 'mindate =',mindate
nRTdates = (array(RTdates)-array(mindate))/array(60.)#convert to minutes?
nmindate = min(nRTdates)#
nmaxdate = max(nRTdates)
#my_bin = linspace(mindate,maxdate,200)
#my_bin = linspace(nmindate,nmaxdate,20)
#binsize = 2
my_bin = arange(nmindate,nmaxdate+binsize,binsize)
fig = plt.figure()
ax = fig.add_subplot(111)
nRT, bins, patch = ax.hist(nRTdates, bins=my_bin)
clf()
#nOC, bins, patch = ax.hist(nOCdates, bins=my_bin)
#ax.xaxis.set_minor_locator(MultipleLocator(3600))
#ax.xaxis.set_major_locator(MultipleLocator(12*3600))
print len(my_bin[1:]), len(nRT)
'''
comparison keyword
'''
nps = []
#keywords = ['gcalvetbarot','Camacho','herrerajoan','Albert_Rivera','HiginiaRoig']
for key in keywords:
keydates = KeywordFilterSimple(posts,key,'created_at','text')
nkeydates = (array(keydates)-array(mindate))/array(60.)
np, bins, patch = hist(nkeydates, bins=my_bin)
nps.append(np)
clf()
plot(my_bin[1:]-array(binsize/2.),nRT,'ro-', linewidth = 2)
for h in nps:
plot(my_bin[1:]-array(binsize/2.),h,'o-', linewidth = 2)
#legend(handles=keywords)
start_time_formatted = time.strftime("%d %b %Y %H:%M:%S +0000",time.gmtime(mindate+2*3600))#türkiye saati
print max(list(nRT))
ax.text(nmindate,max(list(nRT)),start_time_formatted)
ylabel(r"birim zaman basina twit sayisi")
xlabel(r'ilk twitten itibaren gecen zaman')
def Timeline(posts,start_time,end_time,binsize):
posts.create_index([("created_at", pymongo.ASCENDING)])
RTdates = []
for post in posts.find({"created_at": {"$gte": start_time,"$lte": end_time}}).sort([("created_at", pymongo.ASCENDING)]):
RTdates.append(post['created_at'])#in seconds
mindate = min(RTdates)#in UTC seconds
maxdate = max(RTdates)#in UTC seconds
print 'mindate =',mindate
nRTdates = (array(RTdates)-array(mindate))/array(60.)#convert to minutes?
nmindate = min(nRTdates)#
nmaxdate = max(nRTdates)
#my_bin = linspace(mindate,maxdate,200)
#my_bin = linspace(nmindate,nmaxdate,20)
#binsize = 2
my_bin = arange(nmindate,nmaxdate+binsize,binsize)
fig = plt.figure()
ax = fig.add_subplot(111)
nRT, bins, patch = ax.hist(nRTdates, bins=my_bin)
clf()
#nOC, bins, patch = ax.hist(nOCdates, bins=my_bin)
#ax.xaxis.set_minor_locator(MultipleLocator(3600))
#ax.xaxis.set_major_locator(MultipleLocator(12*3600))
print len(my_bin[1:]), len(nRT)
plot(my_bin[1:]-array(binsize/2.),nRT,'ro-', linewidth = 2)
start_time_formatted = time.strftime("%d %b %Y %H:%M:%S",time.gmtime(mindate+4*3600.))#ispanya saati
print max(list(nRT))
#text(nmindate,1.15*max(list(nRT)),start_time_formatted)
figtext(0.13,0.91,start_time_formatted, horizontalalignment='left')
v = axis()
#ylabel(r"birim zaman basina twit sayisi")
#xlabel(r'ilk twitten itibaren gecen zaman')
ylabel(r"Quantitat dels tuits per %2.1f minuts"%binsize)
xlabel(r'Temps a partir del primer tuit (mins)')
#savefig('frequencia231114.png')
#savefig('frequencia300914.png')
return max(nRT), min(nRT), mindate+4*3600., maxdate
def VerticalLine(times, mintime, maxtweet):
#plot([(time-mintime)/60.,(time-mintime)/60.],[0.6*maxtweet,0.9*maxtweet],'k-',linewidth = 2)
#print (time-mintime)/60.
for t in times:
time_formatted = time.strftime("%H:%M",time.gmtime(mintime+t*60.))#ispanya saati
print t, time_formatted
plot([t,t],[0.2*maxtweet,0.55*maxtweet],'k-',linewidth = 2)
text(t,3,time_formatted,horizontalalignment='center', verticalalignment='bottom', rotation='vertical')
def WOReTweets(posts,since,reftime,binsize, filename):
posts.create_index([("created_at", pymongo.ASCENDING)])
RTdates = []
for post in posts.find({"created_at": {"$gte": since}}).sort([("created_at", pymongo.ASCENDING)]):
RTdates.append(post['created_at'])#in seconds
mindate = min(RTdates)
filtered = posts.find({"created_at": {"$gte": mindate+(reftime-binsize/2.)*60.,"$lte": mindate+(reftime+binsize/2.)*60.}})
untw = []
counts = []
for post in filtered:
if post['text'] in untw:
counts[untw.index(post['text'])] += 1
else:
untw.append(post['text'])
counts.append(1)
unique_tweets = [(untw[i],counts[i]) for i in range((len(counts)))]
sorted_rts = sorted(unique_tweets, key = lambda a: a[1], reverse = True)
outname = '%s_rts.dat'%filename
out = codecs.open(outname, encoding='utf-8', mode='a+')
out.write('#at %3.1f: %i\n'%(reftime,filtered.count()))
for i in range(3):#assuming there are more than 3 tweets in the chunk
out.write('%i, %s\n'%(sorted_rts[i][1],sorted_rts[i][0]))
out.close()
#for i in range(len(untw)):
# print counts[i], untw[i]
def WOTweets(posts,since,reftime,binsize,filename):
posts.create_index([("created_at", pymongo.ASCENDING)])
RTdates = []
for post in posts.find({"created_at": {"$gte": since}}).sort([("created_at", pymongo.ASCENDING)]):
RTdates.append(post['created_at'])#in seconds
mindate = min(RTdates)
filtered = posts.find({"created_at": {"$gte": mindate+(reftime-binsize/2.)*60.,"$lte": mindate+(reftime+binsize/2.)*60.}})
outname = '%s_tw.dat'%filename
out = codecs.open(outname, encoding='utf-8', mode='a+')
out.write('#at %3.1f: %i\n'%(reftime,filtered.count()))
for post in filtered:
out.write('%s\n'%(post['text']))
out.close()
def UserContributions(posts,filename):
unUs = []
counts = []
for post in posts.find():
if post['user_name'] in unUs:
counts[unUs.index(post['user_name'])] += 1
else:
unUs.append(post['user_name'])
counts.append(1)
unique_users = [(unUs[i],counts[i]) for i in range((len(counts)))]
sorted_users = sorted(unique_users, key = lambda a: a[1], reverse = True)
outname = '%s_user_statistics.dat'%filename
if os.path.isfile(outname):
os.popen('rm %s'%outname)
out = codecs.open(outname, encoding='utf-8', mode='a+')
#out.write('#at %3.1f: %i\n'%(reftime,filtered.count()))
for i in range(len(sorted_users)):#assuming there are more than 3 tweets in the chunk
out.write('%i, %s\n'%(sorted_users[i][1],sorted_users[i][0]))
out.close()
def clean(word):
newword = word.replace(',','').replace(':','').replace('…','').replace('"','').replace('.','').replace(')','').replace('(','').replace('!','').replace('?','')
return newword
def WordFrequency(posts,filterWords,filename):
unWords = []
counts = []
for post in posts.find():
postWords = post['text'].split()
for postWord in postWords:
cleanPostWord = clean(postWord)
if ('http' in cleanPostWord) or (cleanPostWord in filterWords):
continue
elif cleanPostWord in unWords:
counts[unWords.index(cleanPostWord)] += 1
else:
unWords.append(cleanPostWord)
counts.append(1)
unique_words = [(unWords[i],counts[i]) for i in range((len(counts)))]
sorted_words = sorted(unique_words, key = lambda a: a[1], reverse = True)
outname = '%s_word_frequency.dat'%filename
if os.path.isfile(outname):
os.popen('rm %s'%outname)
out = codecs.open(outname, encoding='utf-8', mode='a+')
#out.write('#at %3.1f: %i\n'%(reftime,filtered.count()))
for i in range(len(sorted_words)):#assuming there are more than 3 tweets in the chunk
out.write('%s: %i\n'%(sorted_words[i][0],sorted_words[i][1]))
#out.write('%s\n'%sorted_words[i][0])
out.close()
def FollowerOps():
'''
#screen_name = user_info['screen_name']
#followers = user_info['followers_count']
#influence = float(followers)/float(user_info['friends_count'])
#statuses = user_info['statuses_count']
#profile_description = user_info['description'].replace('\n',' ')
#out_string = '%s\t%s\t%s\t%f\t%s\t\'%s\''%(user_id,screen_name,followers, influence, statuses, profile_description)
#print out_string
#out.write('%s\n'%out_string)
#out2.write('%s\n'%user_info)
'''
if __name__ == '__main__':
client = pymongo.MongoClient()
db = client['twitter-test']
posts = db.posts #this is a collection
#inputDb(posts,'smt.json')
'''
write a followers collection and a friends collection
'''
user_name = 'guanyem'
users = db.users
#inputUser(users,'guanyem_follower_data_full.dat')
since = time.mktime(time.strptime('14 Nov 2014 07:00:00 +0000',"%d %b %Y %H:%M:%S +0000"))
until = time.mktime(time.strptime('20 Dec 2014 15:20:00 +0000',"%d %b %Y %H:%M:%S +0000"))
'''
followback algortihm starts here
#get follower list
followerIds = []
for user in users.find():
followerIds.append(str(user['_id']))
#get friends list
friendIds = get_users('%s_firen.txt'%user_name)
#pull unique elements from the database
uniqueIds = {}
posts.find().sort([("created_at", pymongo.ASCENDING)])
for post in posts.find({"created_at": {"$gte": since,"$lte":until}}):
if post['user_id'] in uniqueIds:
continue
elif post['user_name'] == user_name:
continue
else:
#uniqueIds.append({post['user_id']: post['user_name']})
uniqueIds[post['user_id']] = post['user_name']
print 'unique ids in this set of posts:',len(uniqueIds)
#get target group
targetUsers = []
for userId in uniqueIds.keys():
if userId in followerIds:
continue
elif userId in friendIds:
continue
else:
targetUsers.append(userId)
print 'target ids in this set of posts:',len(targetUsers)
out = codecs.open('target.dat', encoding='utf-8', mode='w')
for ids in targetUsers:
out.write('%s\t%s\n'%(ids,uniqueIds[ids]))
out.close()
followback algorithm ends here
'''
#since = time.mktime(time.strptime('10 Oct 2014 07:00:00 +0000',"%d %b %Y %H:%M:%S +0000"))
#until = time.mktime(time.strptime('11 Oct 2014 15:20:00 +0000',"%d %b %Y %H:%M:%S +0000"))
since = time.mktime(time.strptime('14 Nov 2014 07:00:00 +0000',"%d %b %Y %H:%M:%S +0000"))
until = time.mktime(time.strptime('20 Nov 2014 15:20:00 +0000',"%d %b %Y %H:%M:%S +0000"))
#print since, until
binsize = 4
maxtweets, mintweets, mintime, maxtime = Timeline(posts,since,until,binsize)
#text(0.9*(maxtime-mintime)/60,maxtweets,'#pujol324',ha='right')
#figtext(0.88,0.85,'#pujol324',ha='right',size =15)
#times = [117.0,127, 137.0,145.0, 159.0, 165.0,181.0, 191.,197.]#elle secildi
#VerticalLine(times, mintime, maxtweets)
#keywords = ['gcalvetbarot','Camacho*','herrerajoan','Albert_Rivera','HiginiaRoig','jorditurull']
keywords = ['guanyemDialogant']
#keywords = ['mac?']
KeywordComparison(posts,since, until,binsize, keywords)
#savefig('frequencia_noucodietic02.png')
show()
#UserContributions(posts,'noucodietic')
#FilterWords = [lines.replace('\n','') for lines in codecs.open('filterWords.dat', encoding='utf-8', mode='r')]
#WordFrequency(posts,FilterWords,'noucodietic02')
'''
outcore = 'AdaColauMAC_051014'
outrt = '%s_rts.dat'%outcore
if os.path.isfile(outrt):
os.popen('rm %s'%outrt)
outtw = '%s_tw.dat'%outcore
if os.path.isfile(outtw):
os.popen('rm %s'%outtw)
for t in times:
#print t
WOTweets(posts,since,t,binsize,outcore)
WOReTweets(posts,since,t,binsize,outcore)
'''
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
import requests
from requests_oauthlib import OAuth1
from urlparse import parse_qs
import json
import codecs
from math import ceil
import os.path
import time
import random
REQUEST_TOKEN_URL = "https://api.twitter.com/oauth/request_token"
AUTHORIZE_URL = "https://api.twitter.com/oauth/authorize?oauth_token="
ACCESS_TOKEN_URL = "https://api.twitter.com/oauth/access_token"
# Go to http://dev.twitter.com and create an app.
# The consumer key and secret will be generated for you after
CONSUMER_KEY=""
CONSUMER_SECRET=""
# After the step above, you will be redirected to your app's page.
# Create an access token under the the "Your access token" section
OAUTH_TOKEN=""
OAUTH_TOKEN_SECRET=""
def setup_oauth():
"""Authorize your app via identifier."""
# Request token
oauth = OAuth1(CONSUMER_KEY, client_secret=CONSUMER_SECRET)
r = requests.post(url=REQUEST_TOKEN_URL, auth=oauth)
credentials = parse_qs(r.content)
resource_owner_key = credentials.get('oauth_token')[0]
resource_owner_secret = credentials.get('oauth_token_secret')[0]
# Authorize
authorize_url = AUTHORIZE_URL + resource_owner_key
print 'Please go here and authorize: ' + authorize_url
verifier = raw_input('Please input the verifier: ')
oauth = OAuth1(CONSUMER_KEY,
client_secret=CONSUMER_SECRET,
resource_owner_key=resource_owner_key,
resource_owner_secret=resource_owner_secret,
verifier=verifier)
# Finally, Obtain the Access Token
r = requests.post(url=ACCESS_TOKEN_URL, auth=oauth)
credentials = parse_qs(r.content)
token = credentials.get('oauth_token')[0]
secret = credentials.get('oauth_token_secret')[0]
return token, secret
def get_oauth():
oauth = OAuth1(CONSUMER_KEY,
client_secret=CONSUMER_SECRET,
resource_owner_key=OAUTH_TOKEN,
resource_owner_secret=OAUTH_TOKEN_SECRET)
return oauth
def get_users(filename):
users = []
f = open(filename)
for lines in f:
users.append(lines.strip())
return users
def loadData(inputFile):
data = []
for line in inputFile:
if line.startswith("#"):
continue
data.append([v for v in line.strip().split()])
return data
def loadDataS(inputFile,seperator):
data = []
for line in inputFile:
if line.startswith("#"):
continue
data.append([v for v in line.strip().split(seperator)])
return data
def transpose(data):
return [[data[j][i] for j in range(len(data))] for i in range(len(data[0]))]
def getFollowers(user):
falovirs = []
f = open(user+'_falovir.txt')
for lines in f:
falovirs.append(lines.strip())
return falovirs
def get_unique(data):
result=[]
result.append(data[0])
for i in range(len(data)):
if data[i] in result:
continue
else:
result.append(data[i])
return result
def getCommonFollowerCount(source_user,target_user):
'''
target is the account we are interested in
'''
source_followers = get_followers(source_user)
target_followers = get_followers(target_user)
source_count = len(source_followers)
target_count = len(target_followers)
count = 0
for f in source_followers:
if f in target_followers:
count += 1
return count, source_count, float(count)/float(source_count), float(count)/float(target_count)
def getCommonFollowerArrayCount(source_followers,target_followers):
'''
target is the account we are interested in
'''
source_count = len(source_followers)
target_count = len(target_followers)
count = 0
target_copy = target_followers
for f in source_followers:
if f in target_copy:
count += 1
source_followers.remove(f)
#target_copy.remove(f)
'''
for efficiency the matching elements are taken out
if userlists are badly made, the repeating elements will not be noticed
'''
return count, source_count, float(count)/float(source_count), float(count)/float(target_count)
def getOldTweets(filename):
input_file = file(filename, "r")
tweets = []
for lines in input_file:
tweets.append(json.loads(lines))
return tweets
def getOldTweetsID(filename):
input_file = file(filename, "r")
tweetIDs = []
for lines in input_file:
tweet = json.loads(lines)
tweetIDs.append(tweet["id"])
return tweetIDs
def GrabSearch(hashtag,since):
#since = -1 #default?
PROFILE = "https://api.twitter.com/1.1/search/tweets.json?q=%s&max_id=%i&result_type=recent&count=100"%(hashtag,since)
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
search= r.json()
#print taym['statuses']
return search
def GrabTweets(name):
PROFILE = "https://api.twitter.com/1.1/statuses/user_timeline.json?include_entities=true&include_rts=true&screen_name=%s"%name
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
taym= r.json()
#print taym['statuses']
return taym
def GrabFollowers(name, no_followers):
if no_followers < 5001:
PROFILE = "https://api.twitter.com/1.1/followers/ids.json?cursor=-1&screen_name=%s&count=5000"%name
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
print r
followers = r.json()
time.sleep(61)
return followers['ids']
elif no_followers > 5000:
no_cursor = int(ceil(no_followers/5000.))
followers_rest = no_followers%5000
followers = []
next_cursor = -1
for i in range(no_cursor):
if i == no_cursor-1:
f_query = followers_rest
else:
f_query = 5000
PROFILE = "https://api.twitter.com/1.1/followers/ids.json?cursor=%i&screen_name=%s&count=%i"%(next_cursor,name,f_query)
print next_cursor
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
print r
next_cursor = r.json()['next_cursor']
#print next_cursor
followers += r.json()['ids']
time.sleep(61)
return followers
def GrabFollowerCount(name):
PROFILE = "https://api.twitter.com/1.1/statuses/user_timeline.json?include_entities=true&include_rts=false&count=1&screen_name=%s"%name
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
taym= r.json()
no_followers = int(taym[0]['user']['followers_count'])
print no_followers
return no_followers
def GrabFriends(name, no_followers):
if no_followers < 5001:
PROFILE = "https://api.twitter.com/1.1/friends/ids.json?cursor=-1&screen_name=%s&count=5000"%name
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
print r
followers = r.json()
time.sleep(61)
return followers['ids']
elif no_followers > 5000:
no_cursor = int(ceil(no_followers/5000.))
followers_rest = no_followers%5000
followers = []
next_cursor = -1
for i in range(no_cursor):
if i == no_cursor-1:
f_query = followers_rest
else:
f_query = 5000
PROFILE = "https://api.twitter.com/1.1/followers/ids.json?cursor=%i&screen_name=%s&count=%i"%(next_cursor,name,f_query)
print next_cursor
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
print r
next_cursor = r.json()['next_cursor']
#print next_cursor
followers += r.json()['ids']
time.sleep(61)
return followers
def GrabFriendCount(name):
PROFILE = "https://api.twitter.com/1.1/statuses/user_timeline.json?include_entities=true&include_rts=false&count=1&screen_name=%s"%name
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
taym= r.json()
no_friends = int(taym[0]['user']['friends_count'])
print no_friends
return no_friends
def GrabTweetsExp(name,max_id):
PROFILE = "https://api.twitter.com/1.1/statuses/user_timeline.json?include_entities=true&include_rts=true&count=200&max_id=%s&screen_name=%s"%(max_id,name)
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
taym= r.json()
#print taym['statuses']
return taym
def GrabHistory(user):
'''
very ugly workaround for grabbing the whole timeline 3200 max
'''
switch = 0
for i in range(100):#cirkin cozum
if switch < 2:
last = os.popen('tail -1 %s_taymlayn.txt'%user).read()
last_tweet = json.loads(last)
last_id = str(last_tweet['id'])
print "%s taymlayninda yeni bir sey var mi?"%user
taymlayn = GrabTweetsExp(user,last_id)
filename = '%s_taymlayn.txt'%user
if os.path.isfile(filename):
OldTweets = getOldTweetsID(filename)
#print OldTweets[0]['text']
out = open(filename,'a+')
for tweet in taymlayn:
if tweet['id'] in OldTweets:
print tweet['id'], ' tibiti dosyada'
switch = switch+1
continue
else:
#print 'mujde! yeni tibit: '#,tweet['text']
switch = 0
json.dump(tweet,out)
out.write('\n')
out.close()
else:
writeTweets(taymlayn,filename)
def GrabUserInfo(user_id):
PROFILE = "https://api.twitter.com/1.1/users/show.json?user_id=%s&include_entities=false"%user_id
if not OAUTH_TOKEN:
token, secret = setup_oauth()
print "OAUTH_TOKEN: " + token
print "OAUTH_TOKEN_SECRET: " + secret
print
else:
oauth = get_oauth()
r = requests.get(url=PROFILE, auth=oauth)
user = r.json()
print r
#print r.json()
return user
def GetNewTimelines(users):
for user in users:
print "%s taymlayninda yeni bir sey var mi?"%user
taymlayn = GrabTweets(user)
filename = '%s_taymlayn.txt'%user
if os.path.isfile(filename):
OldTweets = getOldTweetsID(filename)
#print OldTweets[0]['text']
out = open(filename,'a+')
for tweet in taymlayn:
if tweet['id'] in OldTweets:
print tweet['id'], ' tibiti dosyada'
continue
else:
print 'mujde! yeni tibit: '#,tweet['text']
json.dump(tweet,out)
out.write('\n')
out.close()
else:
writeTweets(taymlayn,filename)
def writeTweets(tweets,filename):
out = codecs.open(filename, encoding='utf-8', mode='w')
for tweet in tweets:
json.dump(tweet,out)
out.write('\n')
out.close()
def GetFollowerCorrelation(target_name,users):
target_followers = getFollowers(target_name)
out = codecs.open(target_name+'_comparison.dat', encoding='utf-8', mode='w')
for user in users:
source_followers = getFollowers(user)
common_count, source_count, source_frac, target_frac = getCommonFollowerArrayCount(source_followers,target_followers)
out_string = '%s\t%i\t%i\t%f\t%f\n'%(user,common_count, source_count, source_frac, target_frac)
print out_string
out.write('%s'%out_string)
out.close()
def GetSearchResults(hashtag):
#hashtag = 'GuanyemSantAntoni'
#since = -1 #default
outname = '%s_search.json'%hashtag
#if os.path.isfile(outname):
# os.popen('rm %s'%outname)
if os.path.isfile(outname):
out = codecs.open(outname, encoding='utf-8', mode='a+')
finalt = json.loads(os.popen('tail -1 %s'%outname).read())
since = finalt['id']
print 'starting from %s'%since
else:
out = codecs.open(outname, encoding='utf-8', mode='a+')
since = -1
print 'new file'
search = GrabSearch(hashtag,since)
try:
print search['search_metadata']
except KeyError:
print search['errors'][0]['message']
if search['errors'][0]['code'] == 88:
print 'waiting...'
time.sleep(15*60+1)
search = GrabSearch(hashtag,since)
since = search['search_metadata']['max_id']
for tweet in search['statuses']:
current_max_date = tweet['created_at']
current_max_id = tweet['id']
print current_max_date, tweet['text']
json.dump(tweet,out)
out.write('\n')
out.close()
print 'compare with', current_max_date, current_max_id
def ReadUserIds(input_file):
#input_file = file(filename, "r")
Ids = []
for lines in input_file:#memory efficient loop
user = json.loads(lines)
try:
user['id']
except KeyError:
print user
continue #assuming there is an acceptable error
Ids.append(str(user['id'])) #conversion to string for getFollowers comparison
return Ids
if __name__ == "__main__":
#users = get_users('userlistGuanyem3.txt')
#followers = get_users('guanyem_falovir.txt')
#OldTweets = getOldTweets('')
#account_name = 'guanyem'
#users += [account_name]
users = ['guanyem']
#GetFollowerCorrelation(account_name,users)
'''
account_name = 'guanyem'
followers = get_users(account_name + '_falovir.txt')
if os.path.isfile(account_name+'_follower_data_full.dat'):
was_open = True
else:
was_open = False
out2 = codecs.open(account_name+'_follower_data_full.dat', encoding='utf-8', mode='a+')
if was_open:
recorded_ids = ReadUserIds(out2)
print len(recorded_ids)
else:
recorded_ids = []
count = 0
#print 'waiting...'
#time.sleep(15*60+1)
for user_id in followers:
if user_id in recorded_ids:
continue
else:
user_info = GrabUserInfo(user_id)
try:
print user_info['screen_name']
except KeyError:
print user_info['errors'][0]['message']
if user_info['errors'][0]['code'] == 34 or user_info['errors'][0]['code'] == 63:
continue
elif user_info['errors'][0]['code'] == 88:
print 'waiting...'
time.sleep(15*60+1)
user_info = GrabUserInfo(user_id)#not good since still it can 404
json.dump(user_info,out2)
out2.write('\n')
out2.close()
'''
for user in users:
print user
friends = GrabFriends(user,GrabFriendCount(user))
filename = '%s_firen.txt'%user
out = codecs.open(filename, encoding='utf-8', mode='w')
for friend in friends:
out.write('%s\n'%friend)
out.close()
for user in users:
print user
followers = GrabFollowers(user,GrabFollowerCount(user))
filename = '%s_falovir.txt'%user
out = codecs.open(filename, encoding='utf-8', mode='w')
for follower in followers:
out.write('%s\n'%follower)
out.close()
'''
try:
print tweet['id']
except KeyError:
print tweet['errors'][0]['message']
if user_info['errors'][0]['code'] == 88:
print 'waiting...'
time.sleep(15*60+1)
print tweet['user']['screen_name']
print tweet['status']['text']
json.dump(tweet,out)
out.write('\n')
out.close()
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment