Last active
February 24, 2021 20:48
-
-
Save rishi-raj-jain/8e3e807489e70a1e1b15d617f4f7acb5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, csv, pprint, json | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
import community as community_louvain | |
import matplotlib.cm as cm | |
from community import generate_dendrogram, best_partition, partition_at_level | |
from pymongo import MongoClient | |
from networkx.readwrite import json_graph | |
import threading | |
# Connecting to the db | |
collection = db['tweets'] | |
G= nx.Graph() # User Mentions | |
G2= nx.Graph() # Retweets Mentions | |
# Keep track of tweets counted | |
mapOriginal= {} | |
'''TODO: Add edges to the graph''' | |
def addToGraph(src, destinations): | |
global G | |
for dest in destinations: | |
if G.has_edge(src, dest['screen_name']): | |
G[src][dest['screen_name']]['weight']+=1 | |
else: | |
G.add_edge(src, dest['screen_name'], weight=1) | |
'''TODO: Process tweet to get user mentions''' | |
def processTweet(item): | |
mentions=[] | |
owner= item['user']['screen_name'] | |
if item.get('truncated') is not None: | |
if item['truncated']: | |
mentions= item['extended_tweet']['entities']['user_mentions'] | |
else: | |
mentions= item['entities']['user_mentions'] | |
else: | |
mentions= item['entities']['user_mentions'] | |
addToGraph(owner, mentions) | |
'''TODO: Add retweet to the G2 graph''' | |
def addToRetweet(src, dest): | |
global G2 | |
if G2.has_edge(src, dest): | |
G2[src][dest]['weight']+=1 | |
else: | |
G2.add_edge(src, dest, weight=1) | |
'''TODO: Write user mentions to CSV''' | |
def UserMentionsCSV(): | |
with open('usermentions.csv', 'w', newline='') as fileSave: | |
global G | |
fileSave.truncate(0) | |
writer= csv.writer(fileSave) | |
writer.writerow(['Source', 'Target', 'Weight']) | |
for u,v in G.edges(): | |
writer.writerow([u, v, G[u][v]['weight']]) | |
'''TODO: Write retweets to CSV''' | |
def RetweetsCSV(): | |
with open('retweets.csv', 'w', newline='') as fileSave: | |
global G2 | |
fileSave.truncate(0) | |
writer= csv.writer(fileSave) | |
writer.writerow(['Source', 'Target', 'Weight']) | |
for u,v in G2.edges(): | |
writer.writerow([u, v, G2[u][v]['weight']]) | |
# To break after | |
J= 0 | |
#Iterating in each record | |
for item in collection.find(): | |
J+=1 | |
if J==10000: | |
break | |
ifRetweeted= True if item.get('retweeted_status') is not None else False | |
ifQuoted= True if item.get('quoted_status') is not None else False | |
# If the tweet is original tweet itself (i.e. no retweet or not a quoted one) | |
if (not ifRetweeted) and (not ifQuoted): | |
if mapOriginal.get(item['id']) is None: | |
mapOriginal[item['id']]= 1 | |
processTweet(item) | |
#In case it's just a retweet | |
elif (ifRetweeted) and (not ifQuoted): | |
if mapOriginal.get(item['retweeted_status']['id']) is None: | |
mapOriginal[item['retweeted_status']['id']]= 1 | |
processTweet(item['retweeted_status']) | |
addToRetweet(item['user']['screen_name'], item['retweeted_status']['user']['screen_name']) | |
#In case it's just a quoted one | |
else: | |
if mapOriginal.get(item['id']) is None: | |
#Get the new tweeter & mentions in that post | |
mapOriginal[item['id']]= 1 | |
processTweet(item) | |
if mapOriginal.get(item['quoted_status']['id']) is None: | |
#Get the original tweeter & mentions in that post | |
mapOriginal[item['quoted_status']['id']]= 1 | |
processTweet(item['quoted_status']) | |
addToRetweet(item['user']['screen_name'], item['quoted_status']['user']['screen_name']) | |
print("Done with iterations...") | |
print("Starting export to CSV...") | |
# Create CSV's (Multithreading) | |
T1= threading.Thread(target= UserMentionsCSV) | |
T2= threading.Thread(target= RetweetsCSV) | |
T1.start() | |
T2.start() | |
T1.join() | |
T2.join() | |
print("Completed!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment