Created
June 5, 2019 12:00
-
-
Save yfe404/a6994ff2f1366895e3a72c41ec7b8542 to your computer and use it in GitHub Desktop.
Website abstraction in Python for scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from selenium import webdriver | |
| from selenium.webdriver.common.keys import Keys | |
| from selenium.common.exceptions import WebDriverException | |
| import os | |
| import time | |
| class Website(webdriver.Firefox): | |
| def connect(self, user=os.environ['USERNAME'], password=os.environ['PASSWORD']): | |
| self.get("https://www.example.com/login") | |
| email_field = self.find_element_by_id("username") | |
| email_field.send_keys(user) | |
| pass_field = self.find_element_by_id("password") | |
| pass_field.send_keys(password) | |
| pass_field.send_keys(Keys.ENTER) | |
| def search(self, keywords): | |
| self.get("https://www.example.com/search/?keywords=" + keywords) | |
| def scroll_by(self, begin, end): | |
| self.execute_script("window.scrollBy({}, {})".format(begin, end)) | |
| def get_page_content(self): | |
| content = [] | |
| names = self.find_elements_by_class_name("actor-name") | |
| jobs = self.find_elements_by_class_name("subline-level-1") | |
| localisations = self.find_elements_by_class_name("subline-level-2") | |
| for i in range(len(names)): | |
| name = names[i].text.replace(';', ',') | |
| job = jobs[i].text.replace(';', ',') | |
| locale = localisations[i].text.replace(';', ',') | |
| person = Person(name, job, locale) | |
| content.append(person) | |
| return content | |
| def has_next(self): | |
| try: | |
| button_next = self.find_elements_by_class_name("next-button")[0] | |
| button_next.text | |
| return True | |
| except IndexError: | |
| return False | |
| def next(self): | |
| button_next = self.find_elements_by_class_name("next-button")[0] | |
| button_next.click() | |
| def close(self): | |
| self.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment