Skip to content

Instantly share code, notes, and snippets.

View aniruddha27's full-sized avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
# Create
data_tweet = DbConnect("SELECT User_Id, Tweet_Id, Tweet FROM TwitterTweet;")
df_tweet = pd.DataFrame(columns=['User_Id','Tweet_Id','Clean_Tweet'])
for data in data_tweet:
index = len(df_tweet)
df_tweet.loc[index,'User_Id'] = data[0]
df_tweet.loc[index,'Tweet_Id'] = data[1]
df_tweet.loc[index,'Clean_Tweet'] = preprocess(data[2])
# Connecting to the Database
def DbConnect(query):
conn = psycopg2.connect(host="localhost",database="TwitterDB",port=5432,user=<user>,password=<password>)
curr = conn.cursor()
curr.execute(query)
rows = curr.fetchall()
# Cleaning the tweets
def preprocess(tweet):
# remove links
tweet = re.sub(r'http\S+', '', tweet)
# remove mentions
tweet = re.sub("@\w+","",tweet)
# alphanumeric and hashtags
tweet = re.sub("[^a-zA-Z#]"," ",tweet)
# remove multiple spaces
# Streaming tweets
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener,
tweet_mode="extended")
myStream.filter(track=['Covid','covid-19'])
# Insert Tweet data into database
def dbConnect(user_id, user_name, tweet_id, tweet, retweet_count, hashtags):
conn = psycopg2.connect(host="localhost",database="TwitterDB",port=5432,user=<user>,password=<password>)
cur = conn.cursor()
# insert user information
command = '''INSERT INTO TwitterUser (user_id, user_name) VALUES (%s,%s) ON CONFLICT
(User_Id) DO NOTHING;'''
# Create cursor to execute SQL commands
cur = conn.cursor()
# Execute SQL commands
for command in commands:
# Create tables
cur.execute(command)
# Close communication with server
conn.commit()
# Table creation
commands = (# Table 1
'''Create Table TwitterUser(User_Id BIGINT PRIMARY KEY, User_Name TEXT);''',
# Table 2
'''Create Table TwitterTweet(Tweet_Id BIGINT PRIMARY KEY,
User_Id BIGINT,
Tweet TEXT,
Retweet_Count INT,
CONSTRAINT fk_user
FOREIGN KEY(User_Id)
# Extract hashtags
def read_hashtags(tag_list):
hashtags = []
for tag in tag_list:
hashtags.append(tag['text'])
return hashtags
class MyStreamListener(tweepy.StreamListener):
def __init__(self, time_limit=300):
self.start_time = time.time()
self.limit = time_limit
super(MyStreamListener, self).__init__()
def on_connect(self):
print("Connected to Twitter API.")
# Retrieve tweets
result = api.search(['covid','Covid-19','COVID-19'], lang='en', count=10)
# JSON keys
pprint(result[0]._json.keys())