Skip to content

Instantly share code, notes, and snippets.

@yfe404
Created June 5, 2019 12:00
Show Gist options
  • Save yfe404/a6994ff2f1366895e3a72c41ec7b8542 to your computer and use it in GitHub Desktop.
Save yfe404/a6994ff2f1366895e3a72c41ec7b8542 to your computer and use it in GitHub Desktop.
Website abstraction in Python for scraping
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import WebDriverException
import os
import time
class Website(webdriver.Firefox):
def connect(self, user=os.environ['USERNAME'], password=os.environ['PASSWORD']):
self.get("https://www.example.com/login")
email_field = self.find_element_by_id("username")
email_field.send_keys(user)
pass_field = self.find_element_by_id("password")
pass_field.send_keys(password)
pass_field.send_keys(Keys.ENTER)
def search(self, keywords):
self.get("https://www.example.com/search/?keywords=" + keywords)
def scroll_by(self, begin, end):
self.execute_script("window.scrollBy({}, {})".format(begin, end))
def get_page_content(self):
content = []
names = self.find_elements_by_class_name("actor-name")
jobs = self.find_elements_by_class_name("subline-level-1")
localisations = self.find_elements_by_class_name("subline-level-2")
for i in range(len(names)):
name = names[i].text.replace(';', ',')
job = jobs[i].text.replace(';', ',')
locale = localisations[i].text.replace(';', ',')
person = Person(name, job, locale)
content.append(person)
return content
def has_next(self):
try:
button_next = self.find_elements_by_class_name("next-button")[0]
button_next.text
return True
except IndexError:
return False
def next(self):
button_next = self.find_elements_by_class_name("next-button")[0]
button_next.click()
def close(self):
self.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment