Created
October 15, 2020 07:37
-
-
Save vessaldaneshvar/816568bd5bb7ba649dae4c532670ca9a to your computer and use it in GitHub Desktop.
Get Data consist of Tweets, Retweets, Likes, Mentions And Follows for every node
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from neo4j import GraphDatabase | |
uri = "bolt://localhost:7687" | |
driver = GraphDatabase.driver(uri, auth=("neo4j", "password")) | |
with open("ids_mini_data.txt",encoding="utf-8-sig") as fo: | |
data = fo.read() | |
list_ids = data.split("\n") | |
with driver.session() as session: | |
for ids in list_ids: | |
session.run("CREATE (n:USER{id_str:$id_str})",id_str=ids) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from neo4j import GraphDatabase | |
import tweepy | |
import json | |
import sys | |
uri = "bolt://localhost:7687" | |
driver = GraphDatabase.driver(uri, auth=("neo4j", "password")) | |
# open json token list | |
with open("token_list.json") as fp: | |
token_list = json.load(fp) | |
token_index = int(sys.argv[1]) | |
consumer_key = token_list['listtoken'][token_index]["consumer_key"] | |
consumer_secret = token_list['listtoken'][token_index]["consumer_secret"] | |
access_token = token_list['listtoken'][token_index]["access_token"] | |
access_token_secret = token_list['listtoken'][token_index]["access_token_secret"] | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token,access_token_secret) | |
api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True) | |
while True: | |
with driver.session() as session: | |
result = session.run("MATCH (n:USER) where NOT EXISTS(n.screen_name) AND NOT EXISTS(n.error_get_data) return n.id_str as id_str limit 1") | |
if not result : break | |
for e in result: | |
id_str = e.data()["id_str"] | |
try: | |
api_data = api.get_user(id_str) | |
except Exception as e: | |
session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name) set n.error_get_data = $error""",id_str=id_str,error=str(e)) | |
session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name) | |
SET n.name = $name , | |
n.screen_name = $screen_name , | |
n.description = $description , | |
n.url = $url , | |
n.followers_count = $followers_count, | |
n.friends_count = $friends_count, | |
n.listed_count = $listed_count, | |
n.created_at = $created_at, | |
n.favourites_count = $favourites_count, | |
n.statuses_count = $statuses_count""",api_data._json) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from neo4j import GraphDatabase | |
import tweepy | |
import json | |
import sys | |
# List of ids_dataset | |
with open("ids_mini_data.txt",encoding="utf-8-sig") as fo: | |
data = fo.read() | |
list_ids = data.split("\n") | |
uri = "bolt://localhost:7687" | |
driver = GraphDatabase.driver(uri, auth=("neo4j", "password")) | |
# open json token list | |
with open("token_list.json") as fp: | |
token_list = json.load(fp) | |
token_index = int(sys.argv[1]) | |
consumer_key = token_list['listtoken'][token_index]["consumer_key"] | |
consumer_secret = token_list['listtoken'][token_index]["consumer_secret"] | |
access_token = token_list['listtoken'][token_index]["access_token"] | |
access_token_secret = token_list['listtoken'][token_index]["access_token_secret"] | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token,access_token_secret) | |
api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True) | |
while True: | |
with driver.session() as session: | |
result = session.run("MATCH (n:USER) where NOT EXISTS(n.get_tweets) AND NOT EXISTS(n.error_get_data) set n.get_tweets = True return n.id_str as id_str limit 1") | |
if not result : break | |
for e in result: | |
id_str = e.data()["id_str"] | |
try: | |
for tweet_data in tweepy.Cursor(api.user_timeline,user_id=id_str,include_rts=True,exclude_replies=False,trim_user=True,count=200).items(): | |
dict_tweet_data = tweet_data._json | |
if dict_tweet_data["entities"]["user_mentions"]: | |
for mention in dict_tweet_data["entities"]["user_mentions"]: | |
ids_user_mention = mention["id_str"] | |
if (ids_user_mention in list_ids) and (ids_user_mention != id_str): | |
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids}) | |
CREATE (a)-[r:MENTIONS{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=ids_user_mention,ids_tweet=dict_tweet_data["id_str"]) | |
if dict_tweet_data["in_reply_to_user_id_str"] : | |
userids_reply = dict_tweet_data["in_reply_to_user_id_str"] | |
if (userids_reply in list_ids) and (userids_reply != id_str): | |
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids}) | |
CREATE (a)-[r:REPLY{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=userids_reply,ids_tweet=dict_tweet_data["id_str"]) | |
if dict_tweet_data.get("retweeted_status",None): | |
retweet_user_ids = dict_tweet_data["retweeted_status"]["user"]["id_str"] | |
if (retweet_user_ids in list_ids) and (retweet_user_ids != id_str): | |
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids}) | |
CREATE (a)-[r:RETWEET{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=retweet_user_ids,ids_tweet=dict_tweet_data["id_str"]) | |
if dict_tweet_data["is_quote_status"]: | |
if dict_tweet_data.get("quoted_status",None): | |
qoute_user_ids = dict_tweet_data["quoted_status"]["user"]["id_str"] | |
if (qoute_user_ids in list_ids) and (qoute_user_ids != id_str): | |
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids}) | |
CREATE (a)-[r:QOUTE{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=qoute_user_ids,ids_tweet=dict_tweet_data["id_str"]) | |
elif dict_tweet_data.get("retweeted_status",{}).get("quoted_status",None): | |
retweet_qoute_user_ids = dict_tweet_data["retweeted_status"]["quoted_status"]["user"]["id_str"] | |
if (retweet_qoute_user_ids in list_ids) and (retweet_qoute_user_ids != id_str): | |
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids}) | |
CREATE (a)-[r:RETWEET_QOUTE{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=retweet_qoute_user_ids,ids_tweet=dict_tweet_data["id_str"]) | |
except Exception as e: | |
session.run("""MATCH (n:USER{id_str:$id_str}) set n.error_get_tweets = $error , n.get_tweets = False""",id_str=id_str,error=str(e)) | |
continue | |
time.sleep(3) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment