Last active
March 21, 2020 14:55
-
-
Save n-eq/1e968934278b0f6691971aa7f6d6cccc to your computer and use it in GitHub Desktop.
Draw a graph of all the quote tweets (including children quote tweets) of a particular tweet (Depth First Search)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
@author: marrakchino | |
## Notes | |
* I used both tweepy and twitter because none of the two libraries is extensive enough to suit my needs: | |
1. Search for a specific tweet using its `id`, tweepy does this perfectly | |
2. Use a raw query (as in the web app), in particular to find quote tweets of a tweet, twitter does this perfecetly | |
* To my great disapointment, I learned that `The Search API is not complete index of all Tweets, but instead an index of recent Tweets. | |
The index includes between 6-9 days of Tweets.` This means this script would only work when applied on a 'recent' tweet. | |
* TODO: | |
- label edges | |
- (uni!)directional edges | |
- clickable nodes | |
- better node placing/highlighting | |
Highly inspired from https://github.com/ugis22/analysing_twitter | |
""" | |
import time | |
import networkx as nx | |
import twitter | |
import tweepy | |
import matplotlib.pyplot as plt | |
# root tweet | |
BASE_ID = "" | |
api = twitter.Api(consumer_key="", | |
consumer_secret="", | |
access_token_key="", | |
access_token_secret="") | |
auth = tweepy.OAuthHandler("", "") | |
auth.set_access_token("", "") | |
tweep = tweepy.API(auth) | |
graph = nx.Graph() | |
l = [] # all processed tweets | |
# id: string representation (id_str field in the Tweet model) | |
def quotes_of(id): | |
query="q=-from%3Aquotedreplies%20url%3A" + id + "&f=live" | |
results = api.GetSearch(raw_query=query) | |
print("quotes of {} : {}".format(id, [res.id for res in results])) | |
new_tweets = [] | |
for res in results: | |
if (res.id_str == id): | |
# Idk how this can happen but it does... | |
continue | |
tweet = tweep.get_status(res.id_str) | |
new_tweets.append(tweet) | |
graph.add_edge(res.id_str, id) | |
graph.node[res.id_str]["user"] = tweet.user.screen_name | |
graph.node[res.id_str]["text"] = tweet.text | |
print("Added node from {} to {}".format(res.id_str, id)) | |
print("{} new tweet(s) quoted from {}".format(len(new_tweets), id)) | |
return new_tweets | |
def loop(base_id = BASE_ID): | |
queue = [tweep.get_status(base_id)] | |
while len(queue) > 0: | |
t = queue.pop() | |
if t not in l: | |
l.append(t) | |
res = quotes_of(t.id_str) | |
if (len(res) == 1 and res[0].id_str == t.id_str): | |
break | |
for r in res: | |
if r not in queue: | |
queue.append(r) | |
time.sleep(0.5) # avoid being blocked by the api | |
print("Found {} tweets".format(len(l))) | |
def draw_graph(): | |
largest_subgraph = max(nx.connected_component_subgraphs(graph), key=len) | |
pos = nx.spring_layout(largest_subgraph, k=0.05) | |
plt.figure(figsize = (20,20)) | |
nx.draw(largest_subgraph, pos=pos, node_color=range(len(graph)), cmap=plt.cm.PiYG, edge_color="black", linewidths=0.8, node_size=60, with_labels=False) | |
plt.show() | |
def save(filename='graph.png'): | |
plt.savefig(filename) | |
if __name__ == '__main__': | |
loopover() | |
draw_graph() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment