Created
July 2, 2018 05:39
-
-
Save rams3sh/c347f7644ca15f98c8af7e495050af4b to your computer and use it in GitHub Desktop.
Tweet Parse using Selenium without Twitter API. Solves the problem of not being able to retrieve the entire set of tweets for an account due to scrolling issue.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Depends on selenium package for python, selenium chrome driver, google-chrome / chrome browser | |
import os | |
from selenium import webdriver | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support import expected_conditions as EC | |
import selenium.webdriver.support.ui as ui | |
from selenium.webdriver.common.by import By | |
#Download Chromium Driver for Selenium and place it in class path before executing this program (Link :- https://www.seleniumhq.org/download/) | |
options=webdriver.ChromeOptions() | |
#Change the hoe directory to your user directory. This is used so as to bypass login restrictions. | |
#This requires user to login to twitter once so that the same login session can be used for carrying out the task | |
options.add_argument("--user-data-dir=/home/root/.config/chromium") | |
browser = webdriver.Chrome(options=options) | |
#Pass the url as first argument | |
base_url=sys.argv[1] | |
browser.get(base_url) | |
body=browser.find_element_by_tag_name('body') | |
wait = ui.WebDriverWait(browser,0.5) | |
while True: | |
try: | |
wait.until(EC.visibility_of_element_located((By.XPATH,"//span[contains(@class, 'Icon Icon--large Icon--logo')]"))) | |
break | |
except: | |
body.send_keys(Keys.END) | |
tweets=browser.find_elements_by_class_name('tweet-text') | |
for tweet in tweets: | |
print (tweet.text) | |
#Usage :- python tweet_parser.py "http://twitter.com/<remaining_url>" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment