This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create | |
data_tweet = DbConnect("SELECT User_Id, Tweet_Id, Tweet FROM TwitterTweet;") | |
df_tweet = pd.DataFrame(columns=['User_Id','Tweet_Id','Clean_Tweet']) | |
for data in data_tweet: | |
index = len(df_tweet) | |
df_tweet.loc[index,'User_Id'] = data[0] | |
df_tweet.loc[index,'Tweet_Id'] = data[1] | |
df_tweet.loc[index,'Clean_Tweet'] = preprocess(data[2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Connecting to the Database | |
def DbConnect(query): | |
conn = psycopg2.connect(host="localhost",database="TwitterDB",port=5432,user=<user>,password=<password>) | |
curr = conn.cursor() | |
curr.execute(query) | |
rows = curr.fetchall() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Cleaning the tweets | |
def preprocess(tweet): | |
# remove links | |
tweet = re.sub(r'http\S+', '', tweet) | |
# remove mentions | |
tweet = re.sub("@\w+","",tweet) | |
# alphanumeric and hashtags | |
tweet = re.sub("[^a-zA-Z#]"," ",tweet) | |
# remove multiple spaces |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Streaming tweets | |
myStreamListener = MyStreamListener() | |
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener, | |
tweet_mode="extended") | |
myStream.filter(track=['Covid','covid-19']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Insert Tweet data into database | |
def dbConnect(user_id, user_name, tweet_id, tweet, retweet_count, hashtags): | |
conn = psycopg2.connect(host="localhost",database="TwitterDB",port=5432,user=<user>,password=<password>) | |
cur = conn.cursor() | |
# insert user information | |
command = '''INSERT INTO TwitterUser (user_id, user_name) VALUES (%s,%s) ON CONFLICT | |
(User_Id) DO NOTHING;''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create cursor to execute SQL commands | |
cur = conn.cursor() | |
# Execute SQL commands | |
for command in commands: | |
# Create tables | |
cur.execute(command) | |
# Close communication with server | |
conn.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Table creation | |
commands = (# Table 1 | |
'''Create Table TwitterUser(User_Id BIGINT PRIMARY KEY, User_Name TEXT);''', | |
# Table 2 | |
'''Create Table TwitterTweet(Tweet_Id BIGINT PRIMARY KEY, | |
User_Id BIGINT, | |
Tweet TEXT, | |
Retweet_Count INT, | |
CONSTRAINT fk_user | |
FOREIGN KEY(User_Id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MyStreamListener(tweepy.StreamListener): | |
def __init__(self, time_limit=300): | |
self.start_time = time.time() | |
self.limit = time_limit | |
super(MyStreamListener, self).__init__() | |
def on_connect(self): | |
print("Connected to Twitter API.") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Retrieve tweets | |
result = api.search(['covid','Covid-19','COVID-19'], lang='en', count=10) | |
# JSON keys | |
pprint(result[0]._json.keys()) |