ferayebend · August 29, 2015 13:57
diff --git a/berkin_toolkit.py b/berkin_toolkit.py
 #!/usr/bin/python
 # -*- encoding: utf-8 -*-
 from __future__ import unicode_literals
 import json
 import codecs
 import sys
 import os

 def getOldTweets(filename):
    input_file  = file(filename, "r")
    tweets = []
    for lines in input_file:
 	tweets.append(json.loads(lines))
    return tweets

 def writeTweets(tweets,filename):
    out = codecs.open(filename, encoding='utf-8', mode='w')
    for tweet in tweets:
    	json.dump(tweet,out)
    	out.write('\n')
    out.close()

 def stdoutStatus(jsonarray):
    for tweet in jsonarray:
 	print tweet['text']

 def getImages(jsonarray):
    mediaHTTPS = []
    for tweet in jsonarray:
 	if 'media' in tweet['entities']:
 	   https = tweet['entities']['media'][0]['media_url_https']
 	   if https in mediaHTTPS:
 	      continue
 	      print 'tekrar var'
 	   else:
 	      mediaHTTPS.append(https+':large')
    return mediaHTTPS

 def countMentions(mentions):
    unique = []
    sayi = []
    for mention in mentions:
 	if mention in unique:
 	   sayi[unique.index(mention)] = sayi[unique.index(mention)]+1
 	else:
 	   unique.append(mention)
 	   sayi.append(1)
    return unique, sayi


 def getMentions(jsonarray):
    mansor = []
    for tweet in jsonarray:
 	if tweet['entities']['user_mentions'] != []:
 	   mansor.append(tweet['entities']['user_mentions'][0]['screen_name'])
    return mansor

 def WOMentionStats(filename):
    name = filename+'.csv'
    outf = open(name,'w')
    unique, sayi = countMentions(getMentions(getOldTweets(filename)))
    for i in range(len(unique)):
 	outf.write('%s,%i\n'%(unique[i], sayi[i]))
    outf.close()

 def dowloadAllImages(filename):
    tweets = getOldTweets(filename)
    medias = getImages(tweets)
    for media in medias:
       os.popen('wget %s'%media)

 def getAllUserMentions(directory):
    command = 'ls %s/*taymlayn.txt'%directory
    users = os.popen(command).read().split()
    for user in users:
        WOMentionStats(user)

 def mergeTweets(basetw,addedtw):
    '''
 	tweet tekrarlarina cikararak, addedtw deki tweetleri basetw'e ekler
    '''
    ids = []
    for fs in basetw:
 	ids.append(fs['id'])

    for ts in addedtw:
 	if ts['id'] in ids:
 	   continue
 	else:
 	   basetw.append(ts)
    del ids

 def WOmergeTweets():
    if len(sys.argv) < 3:
        print '''
    tibit dosyalarini birlestirmek istiyorsun, dosya isimlerini vermiyorsun ;_;
   
    kullanim sekli:
    > berkin_toolkit.py input1.json input2.json
    input2.json dosyasini ayiklayarak input1.json a append eder.
              '''
        sys.exit()
    else:
        file1 = sys.argv[1]
        file2 = sys.argv[2]
    print '''
    \"%s\" \"%s\" dosyalarini alip, tekrarlari ayiklayip \"%s\"a yazdirmaya calisiyorsun.
    eminsin insaAllah u_u'''%(file1,file2,file1)
    if not os.path.isfile(file1):
 	print '''
     %s dosyasi yox, ne is?'''%file1
 	sys.exit()
 
    mergeAndWriteLargeTweetFiles(file1,file2)  


 def mergeAndWriteLargeTweetFiles(basefile,addedfile):
    base  = file(basefile, "r")
    ids = []
    for lines in base:
 	ids.append(json.loads(lines)['id'])
    base.close()
    print "ilk faslin idlerini okudum kaydettim. ayiklanacak dosyayi aciyorum."
    
    baseout = codecs.open(basefile, encoding='utf-8', mode='a')#append?
    #baseout.write('\n')#?
    added = file(addedfile,"r")
    for lines in added:
 	tweet = json.loads(lines)
 	if tweet['id'] in ids:
 	   continue
 	else:
 	   json.dump(tweet,baseout)
 	   baseout.write('\n')
 	   ids.append(tweet['id'])
    added.close()
    baseout.close()
    print "halloldu insallaa, masallaa."

 def FilemergeTweets():
    if len(sys.argv) < 3:
 	print '''
    tibit dosyalarini birlestirmek istiyorsun, dosya isimlerini nereden alacagimi soylemiyorsun
    
    kullanim sekli:
    > berkin_toolkit.py inputliste.txt output.json
 	      '''
 	sys.exit()
    else:
 	liste = sys.argv[1]
 	outfile = sys.argv[2]
    print '''
    dosyalarin listesi surada -->\"%s\", tekrarlari ayiklayip \"%s\"a yazdirmaya calisiyorsun. 
    eminsin insaAllah u_u'''%(liste,outfile)
    if os.path.isfile(outfile):
 	print '''
     %s dosyasi hali hazirda var. silinmesin yazik.'''%outfile
 	sys.exit()
    
    files = open(liste).read().split()
    print files
    print "dosyalarini birlestirecegiz"
   
    base = getOldTweets(files[0])
    for f in files[1:]:
 	print "%s dosyasi okunuyor"%f
 	eklenen = getOldTweets(f)
 	print "gorulmustur. simdi de ayiklayalim"
 	mergeTweets(base,eklenen)
 	del eklenen
    writeTweets(base,outfile)

 if __name__ == "__main__":
    WOmergeTweets()
	#!/usr/bin/python
	# -- encoding: utf-8 --
	from __future__ import unicode_literals
	import json
	import codecs
	import sys
	import os

	def getOldTweets(filename):
	input_file = file(filename, "r")
	tweets = []
	for lines in input_file:
	tweets.append(json.loads(lines))
	return tweets

	def writeTweets(tweets,filename):
	out = codecs.open(filename, encoding='utf-8', mode='w')
	for tweet in tweets:
	json.dump(tweet,out)
	out.write('\n')
	out.close()

	def stdoutStatus(jsonarray):
	for tweet in jsonarray:
	print tweet['text']

	def getImages(jsonarray):
	mediaHTTPS = []
	for tweet in jsonarray:
	if 'media' in tweet['entities']:
	https = tweet['entities']['media'][0]['media_url_https']
	if https in mediaHTTPS:
	continue
	print 'tekrar var'
	else:
	mediaHTTPS.append(https+':large')
	return mediaHTTPS

	def countMentions(mentions):
	unique = []
	sayi = []
	for mention in mentions:
	if mention in unique:
	sayi[unique.index(mention)] = sayi[unique.index(mention)]+1
	else:
	unique.append(mention)
	sayi.append(1)
	return unique, sayi


	def getMentions(jsonarray):
	mansor = []
	for tweet in jsonarray:
	if tweet['entities']['user_mentions'] != []:
	mansor.append(tweet['entities']['user_mentions'][0]['screen_name'])
	return mansor

	def WOMentionStats(filename):
	name = filename+'.csv'
	outf = open(name,'w')
	unique, sayi = countMentions(getMentions(getOldTweets(filename)))
	for i in range(len(unique)):
	outf.write('%s,%i\n'%(unique[i], sayi[i]))
	outf.close()

	def dowloadAllImages(filename):
	tweets = getOldTweets(filename)
	medias = getImages(tweets)
	for media in medias:
	os.popen('wget %s'%media)

	def getAllUserMentions(directory):
	command = 'ls %s/*taymlayn.txt'%directory
	users = os.popen(command).read().split()
	for user in users:
	WOMentionStats(user)

	def mergeTweets(basetw,addedtw):
	'''
	tweet tekrarlarina cikararak, addedtw deki tweetleri basetw'e ekler
	'''
	ids = []
	for fs in basetw:
	ids.append(fs['id'])

	for ts in addedtw:
	if ts['id'] in ids:
	continue
	else:
	basetw.append(ts)
	del ids

	def WOmergeTweets():
	if len(sys.argv) < 3:
	print '''
	tibit dosyalarini birlestirmek istiyorsun, dosya isimlerini vermiyorsun ;_;

	kullanim sekli:
	> berkin_toolkit.py input1.json input2.json
	input2.json dosyasini ayiklayarak input1.json a append eder.
	'''
	sys.exit()
	else:
	file1 = sys.argv[1]
	file2 = sys.argv[2]
	print '''
	\"%s\" \"%s\" dosyalarini alip, tekrarlari ayiklayip \"%s\"a yazdirmaya calisiyorsun.
	eminsin insaAllah u_u'''%(file1,file2,file1)
	if not os.path.isfile(file1):
	print '''
	%s dosyasi yox, ne is?'''%file1
	sys.exit()

	mergeAndWriteLargeTweetFiles(file1,file2)


	def mergeAndWriteLargeTweetFiles(basefile,addedfile):
	base = file(basefile, "r")
	ids = []
	for lines in base:
	ids.append(json.loads(lines)['id'])
	base.close()
	print "ilk faslin idlerini okudum kaydettim. ayiklanacak dosyayi aciyorum."

	baseout = codecs.open(basefile, encoding='utf-8', mode='a')#append?
	#baseout.write('\n')#?
	added = file(addedfile,"r")
	for lines in added:
	tweet = json.loads(lines)
	if tweet['id'] in ids:
	continue
	else:
	json.dump(tweet,baseout)
	baseout.write('\n')
	ids.append(tweet['id'])
	added.close()
	baseout.close()
	print "halloldu insallaa, masallaa."

	def FilemergeTweets():
	if len(sys.argv) < 3:
	print '''
	tibit dosyalarini birlestirmek istiyorsun, dosya isimlerini nereden alacagimi soylemiyorsun

	kullanim sekli:
	> berkin_toolkit.py inputliste.txt output.json
	'''
	sys.exit()
	else:
	liste = sys.argv[1]
	outfile = sys.argv[2]
	print '''
	dosyalarin listesi surada -->\"%s\", tekrarlari ayiklayip \"%s\"a yazdirmaya calisiyorsun.
	eminsin insaAllah u_u'''%(liste,outfile)
	if os.path.isfile(outfile):
	print '''
	%s dosyasi hali hazirda var. silinmesin yazik.'''%outfile
	sys.exit()

	files = open(liste).read().split()
	print files
	print "dosyalarini birlestirecegiz"

	base = getOldTweets(files[0])
	for f in files[1:]:
	print "%s dosyasi okunuyor"%f
	eklenen = getOldTweets(f)
	print "gorulmustur. simdi de ayiklayalim"
	mergeTweets(base,eklenen)
	del eklenen
	writeTweets(base,outfile)

	if __name__ == "__main__":
	WOmergeTweets()
No results found