vessaldaneshvar · October 15, 2020 07:37
diff --git a/create_node_from_file.py b/create_node_from_file.py
 from neo4j import GraphDatabase

 uri = "bolt://localhost:7687"
 driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))

 with open("ids_mini_data.txt",encoding="utf-8-sig") as fo:
    data = fo.read()
 list_ids = data.split("\n")

 with driver.session() as session:
    for ids in list_ids:
        session.run("CREATE (n:USER{id_str:$id_str})",id_str=ids)

diff --git a/get_full_data.py b/get_full_data.py
 from neo4j import GraphDatabase
 import tweepy
 import json
 import sys


 uri = "bolt://localhost:7687"
 driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))

 # open json token list
 with open("token_list.json") as fp:
    token_list = json.load(fp)

 token_index = int(sys.argv[1])
 consumer_key = token_list['listtoken'][token_index]["consumer_key"]
 consumer_secret = token_list['listtoken'][token_index]["consumer_secret"]
 access_token = token_list['listtoken'][token_index]["access_token"]
 access_token_secret = token_list['listtoken'][token_index]["access_token_secret"]

 auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 auth.set_access_token(access_token,access_token_secret)
 api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True)

 while True:
  with driver.session() as session:
    result = session.run("MATCH (n:USER) where NOT EXISTS(n.screen_name) AND NOT EXISTS(n.error_get_data)  return n.id_str as id_str limit 1")
    if not result : break
    for e in result:
      id_str = e.data()["id_str"]
    try:
      api_data = api.get_user(id_str)
    except Exception as e:
      session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name) set n.error_get_data = $error""",id_str=id_str,error=str(e))
    session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name)
    SET n.name = $name ,
    n.screen_name = $screen_name ,
    n.description = $description , 
    n.url = $url , 
    n.followers_count = $followers_count, 
    n.friends_count = $friends_count, 
    n.listed_count = $listed_count, 
    n.created_at = $created_at, 
    n.favourites_count = $favourites_count, 
    n.statuses_count = $statuses_count""",api_data._json)
diff --git a/get_tweets.py b/get_tweets.py
 from neo4j import GraphDatabase
 import tweepy
 import json
 import sys

 # List of ids_dataset
 with open("ids_mini_data.txt",encoding="utf-8-sig") as fo:
    data = fo.read()
 list_ids = data.split("\n")

 uri = "bolt://localhost:7687"
 driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))

 # open json token list
 with open("token_list.json") as fp:
    token_list = json.load(fp)

 token_index = int(sys.argv[1])
 consumer_key = token_list['listtoken'][token_index]["consumer_key"]
 consumer_secret = token_list['listtoken'][token_index]["consumer_secret"]
 access_token = token_list['listtoken'][token_index]["access_token"]
 access_token_secret = token_list['listtoken'][token_index]["access_token_secret"]

 auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 auth.set_access_token(access_token,access_token_secret)
 api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True)

 while True:
    with driver.session() as session:
        result = session.run("MATCH (n:USER) where NOT EXISTS(n.get_tweets) AND NOT EXISTS(n.error_get_data) set n.get_tweets = True return n.id_str as id_str limit 1")
        if not result : break
        for e in result:
            id_str = e.data()["id_str"]
        try:
            for tweet_data in tweepy.Cursor(api.user_timeline,user_id=id_str,include_rts=True,exclude_replies=False,trim_user=True,count=200).items():
                dict_tweet_data = tweet_data._json
                if dict_tweet_data["entities"]["user_mentions"]:
                    for mention in dict_tweet_data["entities"]["user_mentions"]:
                        ids_user_mention = mention["id_str"]
                        if (ids_user_mention in list_ids) and (ids_user_mention != id_str):
                            session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
                            CREATE (a)-[r:MENTIONS{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=ids_user_mention,ids_tweet=dict_tweet_data["id_str"])
                
                if dict_tweet_data["in_reply_to_user_id_str"] :
                    userids_reply = dict_tweet_data["in_reply_to_user_id_str"]
                    if (userids_reply in list_ids) and (userids_reply != id_str):
                        session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
                            CREATE (a)-[r:REPLY{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=userids_reply,ids_tweet=dict_tweet_data["id_str"])

                if dict_tweet_data.get("retweeted_status",None):
                    retweet_user_ids = dict_tweet_data["retweeted_status"]["user"]["id_str"]
                    if (retweet_user_ids in list_ids) and (retweet_user_ids != id_str):
                        session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
                            CREATE (a)-[r:RETWEET{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=retweet_user_ids,ids_tweet=dict_tweet_data["id_str"]) 
                
                if dict_tweet_data["is_quote_status"]:
                    if dict_tweet_data.get("quoted_status",None):
                        qoute_user_ids = dict_tweet_data["quoted_status"]["user"]["id_str"]
                        if (qoute_user_ids in list_ids) and (qoute_user_ids != id_str):
                            session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
                                CREATE (a)-[r:QOUTE{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=qoute_user_ids,ids_tweet=dict_tweet_data["id_str"])
                    elif dict_tweet_data.get("retweeted_status",{}).get("quoted_status",None):
                        retweet_qoute_user_ids = dict_tweet_data["retweeted_status"]["quoted_status"]["user"]["id_str"]
                        if (retweet_qoute_user_ids in list_ids) and (retweet_qoute_user_ids != id_str):                        
                            session.run("""MATCH (a:USER{id_str:$a_ids}),(b:USER{id_str:$b_ids})
                                CREATE (a)-[r:RETWEET_QOUTE{tweet_id:$ids_tweet}]->(b)""",a_ids=id_str,b_ids=retweet_qoute_user_ids,ids_tweet=dict_tweet_data["id_str"])
        except Exception as e:
            session.run("""MATCH (n:USER{id_str:$id_str}) set n.error_get_tweets = $error , n.get_tweets = False""",id_str=id_str,error=str(e))
            continue
        time.sleep(3)
	from neo4j import GraphDatabase

	uri = "bolt://localhost:7687"
	driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))

	with open("ids_mini_data.txt",encoding="utf-8-sig") as fo:
	data = fo.read()
	list_ids = data.split("\n")

	with driver.session() as session:
	for ids in list_ids:
	session.run("CREATE (n:USER{id_str:$id_str})",id_str=ids)
	from neo4j import GraphDatabase
	import tweepy
	import json
	import sys


	uri = "bolt://localhost:7687"
	driver = GraphDatabase.driver(uri, auth=("neo4j", "password"))

	# open json token list
	with open("token_list.json") as fp:
	token_list = json.load(fp)

	token_index = int(sys.argv[1])
	consumer_key = token_list['listtoken'][token_index]["consumer_key"]
	consumer_secret = token_list['listtoken'][token_index]["consumer_secret"]
	access_token = token_list['listtoken'][token_index]["access_token"]
	access_token_secret = token_list['listtoken'][token_index]["access_token_secret"]

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token,access_token_secret)
	api = tweepy.API(auth,wait_on_rate_limit_notify=True,wait_on_rate_limit=True)

	while True:
	with driver.session() as session:
	result = session.run("MATCH (n:USER) where NOT EXISTS(n.screen_name) AND NOT EXISTS(n.error_get_data) return n.id_str as id_str limit 1")
	if not result : break
	for e in result:
	id_str = e.data()["id_str"]
	try:
	api_data = api.get_user(id_str)
	except Exception as e:
	session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name) set n.error_get_data = $error""",id_str=id_str,error=str(e))
	session.run("""MATCH (n:USER{id_str:$id_str}) where NOT EXISTS(n.screen_name)
	SET n.name = $name ,
	n.screen_name = $screen_name ,
	n.description = $description ,
	n.url = $url ,
	n.followers_count = $followers_count,
	n.friends_count = $friends_count,
	n.listed_count = $listed_count,
	n.created_at = $created_at,
	n.favourites_count = $favourites_count,
	n.statuses_count = $statuses_count""",api_data._json)