Skip to content

Instantly share code, notes, and snippets.

@riteshkawadkar
Last active April 28, 2021 15:03
Show Gist options
  • Save riteshkawadkar/1f11a75175f15626e064988e0dc8679e to your computer and use it in GitHub Desktop.
Save riteshkawadkar/1f11a75175f15626e064988e0dc8679e to your computer and use it in GitHub Desktop.
import time
from selenium import webdriver
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import Chrome
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
options = Options()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--ignore-certificate-errors")
options.add_argument("--user-data-dir=C:\\Users\\rites\\AppData\\Local\\Google\\Chrome\\User Data")
##### Web scrapper for infinite scrolling page #####
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
driver.get("https://www.linkedin.com/in/manish-kawadkar-37629435/detail/recent-activity/")
time.sleep(2) # Allow 2 seconds for the web page to open
scroll_pause_time = 1 # You can set your own pause time. My laptop is a bit slow so I use 1 sec
screen_height = driver.execute_script("return window.screen.height;") # get the screen height of the web
i = 1
while True:
# scroll one screen height each time
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i))
i += 1
time.sleep(scroll_pause_time)
# update scroll height each time after scrolled, as the scroll height can change after we scrolled the page
scroll_height = driver.execute_script("return document.body.scrollHeight;")
# Break the loop when the height we need to scroll to is larger than the total scroll height
if (screen_height) * i > scroll_height:
break
##### Extract Reddit URLs #####
urls = []
soup = BeautifulSoup(driver.page_source, "html.parser")
#print(soup)
print(len(soup.find_all('div', class_='occludable-update ember-view')))
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment