Skip to content

Instantly share code, notes, and snippets.

@im-noob
Created August 7, 2021 06:38
Show Gist options
  • Save im-noob/507c1eab5ec2d08c8ddc0bf0ccb714ec to your computer and use it in GitHub Desktop.
Save im-noob/507c1eab5ec2d08c8ddc0bf0ccb714ec to your computer and use it in GitHub Desktop.
Record 1st trending tweet count in every minute
import json
import os
from datetime import datetime
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
print('Starting...')
# CHROME_PATH = '/usr/bin/google-chrome'
CHROMEDRIVER_PATH = './chromedriver'
WINDOW_SIZE = "1920,1080"
chrome_options = Options()
chrome_options.headless = True
# chrome_options.add_argument("--headless")
# chrome_options.add_argument("--window-size=%s" % WINDOW_SIZE)
# chrome_options.add_argument('--user-data-dir=./User_Data')
# chrome_options.add_argument("--no-sandbox")
# chrome_options.add_argument("--disable-dev-shm-usage")
# chrome_options.add_argument("--disable-gpu")
# chrome_options.add_argument("--remote-debugging-port=9222")
# chrome_options.add_experimental_option("excludeSwitches",["ignore-certificate-errors"])
print('initiated...')
preferences = {
# "profile.default_content_settings.popups": 0,
"download.default_directory": os.getcwd() + os.path.sep,
# "directory_upgrade": True
}
chrome_options.add_experimental_option('prefs', preferences)
# chrome_options.binary_location = CHROME_PATH
driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, options=chrome_options)
driver.maximize_window()
url = 'https://trendlistz.com/india'
def fetch_and_save_data_to_csv():
try:
driver.get(url)
card_selector = '//li[@class="trend-item"]'
card_elements = driver.find_elements_by_xpath(card_selector)
first_card_element = card_elements[0]
first_card_element_splited = first_card_element.text.split('\n')
print(first_card_element_splited)
_, tag, trending_since, tweet_count = first_card_element_splited
trending_since_formated = trending_since.split('Trending since ')[1][:-1]
tweet_count_formated = tweet_count.split(' TWEETS')[0]
if 'K' in tweet_count_formated:
tweet_count_formated = str( int(tweet_count_formated[:-1]) * 1000)
else:
print('skipping due to insufficient data')
return
time = datetime.now()
with open('tweet_trend.csv','a') as file:
file.write(str(time) + '|' + tag + '|' + trending_since_formated + '|' + tweet_count_formated + '\n')
except Exception as e:
print(e)
def draw_graph():
df = pd.read_csv('tweet_trend.csv',header=None,names=['Time','Hashtag','Duration','Tweet Count'],sep='|')
df['Time'] = pd.to_datetime(df['Time'])
plt.figure(figsize=(16, 9), dpi=100)
sns.lineplot(x='Time', y='Tweet Count', data=df)
plt.savefig('tweet_trend.png',dpi=100)
# plt.clf()
plt.close()
while True:
fetch_and_save_data_to_csv()
draw_graph()
print('Data Updated...')
sleep(60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment