Created
June 21, 2021 16:39
-
-
Save snydergd/7126b102b992b5e9b640a81cd0efe7ca to your computer and use it in GitHub Desktop.
Scrape ride information from Mall of America Nickelodeon Universe website, and put into a CSV using Selenium in python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.common.exceptions import ElementNotInteractableException | |
from selenium.webdriver.common.action_chains import ActionChains | |
from selenium.webdriver.remote.webelement import WebElement | |
from selenium.webdriver.support.wait import WebDriverWait | |
from selenium.webdriver.support import expected_conditions | |
from selenium.webdriver.common.keys import Keys | |
import csv | |
driver = webdriver.Chrome() | |
wait = WebDriverWait(driver, 10) | |
driver.get("https://nickelodeonuniverse.com/rides/") | |
driver.maximize_window() | |
driver.implicitly_wait(6) | |
driver.find_element_by_class_name('popmake-close').click() | |
driver.find_element_by_class_name('c-button-accept').click() | |
els = driver.find_elements_by_class_name('ride-thumb__link') | |
with open('rides.csv','w', newline='', encoding='utf-8') as f: | |
writer = csv.writer(f, dialect='unix') | |
for el in els: | |
image_url = el.get_attribute("data-background-image").strip('"') | |
url = el.get_attribute("href").strip('"') | |
old_handles = driver.window_handles | |
ActionChains(driver).key_down(Keys.CONTROL).click(el).key_up(Keys.CONTROL).perform() | |
wait.until(expected_conditions.new_window_is_opened(old_handles)) | |
driver.switch_to.window(driver.window_handles[-1]) | |
title = driver.find_element_by_class_name("ride__title").text | |
def get_ride_detail(name): | |
found = driver.find_elements_by_xpath(f"//*[@class='ride__detail' and contains(., '{name}')]//*[@class='ride__detail-main']") | |
if len(found) == 1: | |
return found[0].text | |
else: | |
return '' | |
height = get_ride_detail("Height Req.") | |
points = get_ride_detail("Points to Ride") | |
if len(points) == 0: | |
points = get_ride_detail('Price') | |
description_el: WebElement = driver.find_element_by_xpath("//div[./h3[contains(text(),'DESCRIPTION')]]") | |
description_html = description_el.get_attribute("innerHTML") | |
writer.writerow([title,height,points,image_url,url,description_html]) | |
print(title) | |
driver.close() | |
driver.switch_to.window(driver.window_handles[0]) | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment