-
-
Save PhanDuc/b4184a9db57cb296d9ce33ac93bd332e to your computer and use it in GitHub Desktop.
infinite scroll of stackstatus with python in phantomjs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import selenium | |
import time | |
from selenium import webdriver | |
browser = webdriver.PhantomJS("phantomjs") | |
browser.get("https://twitter.com/StackStatus") | |
print browser.title | |
pause = 3 | |
lastHeight = browser.execute_script("return document.body.scrollHeight") | |
print lastHeight | |
i = 0 | |
browser.get_screenshot_as_file("test03_1_"+str(i)+".jpg") | |
while True: | |
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") | |
time.sleep(pause) | |
newHeight = browser.execute_script("return document.body.scrollHeight") | |
print newHeight | |
if newHeight == lastHeight: | |
break | |
lastHeight = newHeight | |
i += 1 | |
browser.get_screenshot_as_file("test03_1_"+str(i)+".jpg") | |
browser.quit() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import selenium | |
import time | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
import selenium.webdriver.support.expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
browser = webdriver.PhantomJS("phantomjs") | |
#browser.get("https://twitter.com/StackStatus") | |
browser.get("https://twitter.com/StackOverheards") # shorter, so that ending can be tested | |
print browser.title | |
i = 0 | |
browser.get_screenshot_as_file("test03_2_"+str(i)+".jpg") | |
while True: | |
print "i", i | |
elemsCount = browser.execute_script("return document.querySelectorAll('.stream-items > li.stream-item').length") | |
#print "c", elemsCount | |
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);") | |
#element = WebDriverWait(browser, 20).until( | |
# EC.presence_of_element_located((By.XPATH, | |
# "//*[contains(@class,'GridTimeline-items')]/li[contains(@class,'stream-item')]["+str(elemsCount+1)+"]"))) | |
try: | |
WebDriverWait(browser, 20).until( | |
lambda x: x.find_element_by_xpath( | |
"//*[contains(@class,'stream-items')]/li[contains(@class,'stream-item')]["+str(elemsCount+1)+"]")) | |
except: | |
break | |
i += 1 | |
browser.get_screenshot_as_file("test03_2_"+str(i)+".jpg") | |
browser.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment