Last active
June 12, 2023 00:00
-
-
Save artjomb/07209e859f9bf0206f76 to your computer and use it in GitHub Desktop.
infinite scroll of stackstatus with python in phantomjs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import selenium | |
import time | |
from selenium import webdriver | |
browser = webdriver.PhantomJS("phantomjs") | |
browser.get("https://twitter.com/StackStatus") | |
print browser.title | |
pause = 3 | |
lastHeight = browser.execute_script("return document.body.scrollHeight") | |
print lastHeight | |
i = 0 | |
browser.get_screenshot_as_file("test03_1_"+str(i)+".jpg") | |
while True: | |
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") | |
time.sleep(pause) | |
newHeight = browser.execute_script("return document.body.scrollHeight") | |
print newHeight | |
if newHeight == lastHeight: | |
break | |
lastHeight = newHeight | |
i += 1 | |
browser.get_screenshot_as_file("test03_1_"+str(i)+".jpg") | |
browser.quit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import selenium | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
import selenium.webdriver.support.expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
browser = webdriver.PhantomJS("phantomjs") | |
#browser.get("https://twitter.com/StackStatus") | |
browser.get("https://twitter.com/StackOverheards") # shorter, so that ending can be tested | |
print browser.title | |
i = 0 | |
browser.get_screenshot_as_file("test03_2_"+str(i)+".jpg") | |
while True: | |
print "i", i | |
elemsCount = browser.execute_script("return document.querySelectorAll('.stream-items > li.stream-item').length") | |
#print "c", elemsCount | |
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") | |
#element = WebDriverWait(browser, 20).until( | |
# EC.presence_of_element_located((By.XPATH, | |
# "//*[contains(@class,'GridTimeline-items')]/li[contains(@class,'stream-item')]["+str(elemsCount+1)+"]"))) | |
try: | |
WebDriverWait(browser, 20).until( | |
lambda x: x.find_element_by_xpath( | |
"//*[contains(@class,'stream-items')]/li[contains(@class,'stream-item')]["+str(elemsCount+1)+"]")) | |
except: | |
break | |
i += 1 | |
browser.get_screenshot_as_file("test03_2_"+str(i)+".jpg") | |
browser.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
why not user-agent?