Skip to content

Instantly share code, notes, and snippets.

@vessaldaneshvar
Created October 15, 2020 07:37
Show Gist options
  • Save vessaldaneshvar/816568bd5bb7ba649dae4c532670ca9a to your computer and use it in GitHub Desktop.
Save vessaldaneshvar/816568bd5bb7ba649dae4c532670ca9a to your computer and use it in GitHub Desktop.
Get Data consist of Tweets, Retweets, Likes, Mentions And Follows for every node
from neo4j import GraphDatabase
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))
with open("ids_mini_data.txt",encoding="utf-8-sig") as fo:
data = fo.read()
list_ids = data.split("\n")
with driver.session() as session:
for ids in list_ids:
session.run("CREATE (n:USER{id_str:$id_str})",id_str=ids)
from neo4j import GraphDatabase
import tweepy
import json
import sys
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))
# open json token list
with open("token_list.json") as fp:
token_list = json.load(fp)
token_index = int(sys.argv[1])
consumer_key = token_list['listtoken'][token_index]["consumer_key"]
consumer_secret = token_list['listtoken'][token_index]["consumer_secret"]
access_token = token_list['listtoken'][token_index]["access_token"]
access_token_secret = token_list['listtoken'][token_index]["access_token_secret"]
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token,access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True)
while True:
with driver.session() as session:
result = session.run("MATCH (n:USER) where NOT EXISTS(n.screen_name) AND NOT EXISTS(n.error_get_data) return n.id_str as id_str limit 1")
if not result : break
for e in result:
id_str = e.data()["id_str"]
try:
api_data = api.get_user(id_str)
except Exception as e:
session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name) set n.error_get_data = $error""",id_str=id_str,error=str(e))
session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name)
SET n.name = $name ,
n.screen_name = $screen_name ,
n.description = $description ,
n.url = $url ,
n.followers_count = $followers_count,
n.friends_count = $friends_count,
n.listed_count = $listed_count,
n.created_at = $created_at,
n.favourites_count = $favourites_count,
n.statuses_count = $statuses_count""",api_data._json)
from neo4j import GraphDatabase
import tweepy
import json
import sys
# List of ids_dataset
with open("ids_mini_data.txt",encoding="utf-8-sig") as fo:
data = fo.read()
list_ids = data.split("\n")
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))
# open json token list
with open("token_list.json") as fp:
token_list = json.load(fp)
token_index = int(sys.argv[1])
consumer_key = token_list['listtoken'][token_index]["consumer_key"]
consumer_secret = token_list['listtoken'][token_index]["consumer_secret"]
access_token = token_list['listtoken'][token_index]["access_token"]
access_token_secret = token_list['listtoken'][token_index]["access_token_secret"]
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token,access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True)
while True:
with driver.session() as session:
result = session.run("MATCH (n:USER) where NOT EXISTS(n.get_tweets) AND NOT EXISTS(n.error_get_data) set n.get_tweets = True return n.id_str as id_str limit 1")
if not result : break
for e in result:
id_str = e.data()["id_str"]
try:
for tweet_data in tweepy.Cursor(api.user_timeline,user_id=id_str,include_rts=True,exclude_replies=False,trim_user=True,count=200).items():
dict_tweet_data = tweet_data._json
if dict_tweet_data["entities"]["user_mentions"]:
for mention in dict_tweet_data["entities"]["user_mentions"]:
ids_user_mention = mention["id_str"]
if (ids_user_mention in list_ids) and (ids_user_mention != id_str):
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
CREATE (a)-[r:MENTIONS{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=ids_user_mention,ids_tweet=dict_tweet_data["id_str"])
if dict_tweet_data["in_reply_to_user_id_str"] :
userids_reply = dict_tweet_data["in_reply_to_user_id_str"]
if (userids_reply in list_ids) and (userids_reply != id_str):
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
CREATE (a)-[r:REPLY{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=userids_reply,ids_tweet=dict_tweet_data["id_str"])
if dict_tweet_data.get("retweeted_status",None):
retweet_user_ids = dict_tweet_data["retweeted_status"]["user"]["id_str"]
if (retweet_user_ids in list_ids) and (retweet_user_ids != id_str):
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
CREATE (a)-[r:RETWEET{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=retweet_user_ids,ids_tweet=dict_tweet_data["id_str"])
if dict_tweet_data["is_quote_status"]:
if dict_tweet_data.get("quoted_status",None):
qoute_user_ids = dict_tweet_data["quoted_status"]["user"]["id_str"]
if (qoute_user_ids in list_ids) and (qoute_user_ids != id_str):
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
CREATE (a)-[r:QOUTE{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=qoute_user_ids,ids_tweet=dict_tweet_data["id_str"])
elif dict_tweet_data.get("retweeted_status",{}).get("quoted_status",None):
retweet_qoute_user_ids = dict_tweet_data["retweeted_status"]["quoted_status"]["user"]["id_str"]
if (retweet_qoute_user_ids in list_ids) and (retweet_qoute_user_ids != id_str):
session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
CREATE (a)-[r:RETWEET_QOUTE{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=retweet_qoute_user_ids,ids_tweet=dict_tweet_data["id_str"])
except Exception as e:
session.run("""MATCH (n:USER{id_str:$id_str}) set n.error_get_tweets = $error , n.get_tweets = False""",id_str=id_str,error=str(e))
continue
time.sleep(3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment