Skip to content

Instantly share code, notes, and snippets.

@ferayebend
Last active August 29, 2015 13:57
Show Gist options
  • Save ferayebend/9497021 to your computer and use it in GitHub Desktop.
Save ferayebend/9497021 to your computer and use it in GitHub Desktop.
ascii json olarak yazılmış tweetleri okumak/işlemek için bir grup edevat
#!/usr/bin/python
# -*- encoding: utf-8 -*-
from __future__ import unicode_literals
import json
import codecs
import sys
import os
def getOldTweets(filename):
input_file = file(filename, "r")
tweets = []
for lines in input_file:
tweets.append(json.loads(lines))
return tweets
def writeTweets(tweets,filename):
out = codecs.open(filename, encoding='utf-8', mode='w')
for tweet in tweets:
json.dump(tweet,out)
out.write('\n')
out.close()
def stdoutStatus(jsonarray):
for tweet in jsonarray:
print tweet['text']
def getImages(jsonarray):
mediaHTTPS = []
for tweet in jsonarray:
if 'media' in tweet['entities']:
https = tweet['entities']['media'][0]['media_url_https']
if https in mediaHTTPS:
continue
print 'tekrar var'
else:
mediaHTTPS.append(https+':large')
return mediaHTTPS
def countMentions(mentions):
unique = []
sayi = []
for mention in mentions:
if mention in unique:
sayi[unique.index(mention)] = sayi[unique.index(mention)]+1
else:
unique.append(mention)
sayi.append(1)
return unique, sayi
def getMentions(jsonarray):
mansor = []
for tweet in jsonarray:
if tweet['entities']['user_mentions'] != []:
mansor.append(tweet['entities']['user_mentions'][0]['screen_name'])
return mansor
def WOMentionStats(filename):
name = filename+'.csv'
outf = open(name,'w')
unique, sayi = countMentions(getMentions(getOldTweets(filename)))
for i in range(len(unique)):
outf.write('%s,%i\n'%(unique[i], sayi[i]))
outf.close()
def dowloadAllImages(filename):
tweets = getOldTweets(filename)
medias = getImages(tweets)
for media in medias:
os.popen('wget %s'%media)
def getAllUserMentions(directory):
command = 'ls %s/*taymlayn.txt'%directory
users = os.popen(command).read().split()
for user in users:
WOMentionStats(user)
def mergeTweets(basetw,addedtw):
'''
tweet tekrarlarina cikararak, addedtw deki tweetleri basetw'e ekler
'''
ids = []
for fs in basetw:
ids.append(fs['id'])
for ts in addedtw:
if ts['id'] in ids:
continue
else:
basetw.append(ts)
del ids
def WOmergeTweets():
if len(sys.argv) < 3:
print '''
tibit dosyalarini birlestirmek istiyorsun, dosya isimlerini vermiyorsun ;_;
kullanim sekli:
> berkin_toolkit.py input1.json input2.json
input2.json dosyasini ayiklayarak input1.json a append eder.
'''
sys.exit()
else:
file1 = sys.argv[1]
file2 = sys.argv[2]
print '''
\"%s\" \"%s\" dosyalarini alip, tekrarlari ayiklayip \"%s\"a yazdirmaya calisiyorsun.
eminsin insaAllah u_u'''%(file1,file2,file1)
if not os.path.isfile(file1):
print '''
%s dosyasi yox, ne is?'''%file1
sys.exit()
mergeAndWriteLargeTweetFiles(file1,file2)
def mergeAndWriteLargeTweetFiles(basefile,addedfile):
base = file(basefile, "r")
ids = []
for lines in base:
ids.append(json.loads(lines)['id'])
base.close()
print "ilk faslin idlerini okudum kaydettim. ayiklanacak dosyayi aciyorum."
baseout = codecs.open(basefile, encoding='utf-8', mode='a')#append?
#baseout.write('\n')#?
added = file(addedfile,"r")
for lines in added:
tweet = json.loads(lines)
if tweet['id'] in ids:
continue
else:
json.dump(tweet,baseout)
baseout.write('\n')
ids.append(tweet['id'])
added.close()
baseout.close()
print "halloldu insallaa, masallaa."
def FilemergeTweets():
if len(sys.argv) < 3:
print '''
tibit dosyalarini birlestirmek istiyorsun, dosya isimlerini nereden alacagimi soylemiyorsun
kullanim sekli:
> berkin_toolkit.py inputliste.txt output.json
'''
sys.exit()
else:
liste = sys.argv[1]
outfile = sys.argv[2]
print '''
dosyalarin listesi surada -->\"%s\", tekrarlari ayiklayip \"%s\"a yazdirmaya calisiyorsun.
eminsin insaAllah u_u'''%(liste,outfile)
if os.path.isfile(outfile):
print '''
%s dosyasi hali hazirda var. silinmesin yazik.'''%outfile
sys.exit()
files = open(liste).read().split()
print files
print "dosyalarini birlestirecegiz"
base = getOldTweets(files[0])
for f in files[1:]:
print "%s dosyasi okunuyor"%f
eklenen = getOldTweets(f)
print "gorulmustur. simdi de ayiklayalim"
mergeTweets(base,eklenen)
del eklenen
writeTweets(base,outfile)
if __name__ == "__main__":
WOmergeTweets()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment