-
-
Save elchappo/0d8d823677e09215e0f989f46198e32b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
# -*- coding: utf-8 -*- | |
from time import sleep | |
from random import randint | |
from selenium import webdriver | |
from pyvirtualdisplay import Display | |
class MuncherySpider(): | |
def __init__(self): | |
self.url_to_crawl = "https://munchery.com/" | |
self.all_items = [] | |
# Open headless chromedriver | |
def start_driver(self): | |
print('starting driver...') | |
self.display = Display(visible=0, size=(800, 600)) | |
self.display.start() | |
self.driver = webdriver.Chrome("/var/chromedriver/chromedriver") | |
sleep(4) | |
# Close chromedriver | |
def close_driver(self): | |
print('closing driver...') | |
self.display.stop() | |
self.driver.quit() | |
print('closed!') | |
# Tell the browser to get a page | |
def get_page(self, url): | |
print('getting page...') | |
self.driver.get(url) | |
sleep(randint(2,3)) | |
# Munchery front gate page | |
def login(self): | |
print('getting pass the gate page...') | |
try: | |
form = self.driver.find_element_by_xpath('//*[@class="signup-login-form"]') | |
form.find_element_by_xpath('.//*[@class="user-input email"]').send_keys('[email protected]') | |
form.find_element_by_xpath('.//*[@class="user-input zip-code"]').send_keys('94011') | |
form.find_element_by_xpath('.//*[@class="large orange button"]').click() | |
sleep(randint(3,5)) | |
except Exception: | |
pass | |
def grab_list_items(self): | |
print('grabbing list of items...') | |
for div in self.driver.find_elements_by_xpath('//ul[@class="menu-items row"]//li'): | |
data = self.process_elements(div) | |
if data: | |
self.all_items.append(data) | |
else: | |
pass | |
def process_elements(self, div): | |
prd_image = '' | |
prd_title = '' | |
prd_price = '' | |
try: | |
prd_image = div.find_element_by_xpath('.//*[@class="photo item-photo"]').get_attribute("source") | |
prd_title = div.find_element_by_xpath('.//*[@class="text ng-binding"]').text | |
prd_price = div.find_element_by_xpath('.//*[@class="price ng-scope ng-binding"]').text | |
except Exception: | |
pass | |
if prd_image and prd_title and prd_price: | |
single_item_info = { | |
'image': prd_image.encode('UTF-8'), | |
'title': prd_title.encode('UTF-8'), | |
'price': prd_price.encode('UTF-8') | |
} | |
return single_item_info | |
else: | |
return False | |
def parse(self): | |
self.start_driver() | |
self.get_page(self.url_to_crawl) | |
self.login() | |
self.grab_list_items() | |
self.close_driver() | |
if self.all_items: | |
return self.all_items | |
else: | |
return False, False | |
# Run spider | |
Munchery = MuncherySpider() | |
items_list = Munchery.parse() | |
# Do something with the data touched | |
for item in items_list: | |
print(item) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment