Created
July 4, 2023 09:57
-
-
Save josifoski/97aa9c4fa9cfadc1b1bab3d87f6e6615 to your computer and use it in GitHub Desktop.
Amazon.com.au
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Script for scanning amazon.com.au for ps5 | |
# Aleksandar Josifoski about.me/josifsk for Timothy Yang via upwork | |
# Date created: 2022 January | |
# Dependencies: | |
# pip3 install -U selenium | |
# Create on pi, /home/pi/amznau2 directory & place geckodriver there | |
# Start program with python3 amznau2.py | |
# After ps5 showing in stock, this script will make screenshot photo.png | |
# Also will save html page in file html.txt | |
######################################################################### | |
import getpass | |
username = getpass.getuser() | |
comp = username | |
if comp == 'josifoski': | |
dir_in = '/data/upwork/Timothy_Yang/' | |
geckodriver_path = '/data/Scrape/geckodriver' | |
else: | |
dir_in = '/home/pi/amznau2/' | |
geckodriver_path = '/home/pi/amznau2/geckodriver' | |
brows = 'firefox' | |
headless = True | |
import codecs | |
import datetime | |
timeout = 10 | |
######################################################################### | |
from selenium import webdriver | |
from selenium.webdriver.firefox.options import Options | |
from selenium.webdriver.firefox.service import Service | |
from selenium.webdriver.common.keys import Keys | |
from selenium.common.exceptions import TimeoutException | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.action_chains import ActionChains | |
import sys | |
import os | |
import time | |
import traceback | |
import random | |
import requests | |
import re | |
import html | |
time1 = time.time() | |
driver = None | |
def openurl(url): | |
'''function to open url using selenium''' | |
global driver | |
try: | |
driver.get(url) | |
print('loading ' + url) | |
except Exception as e: | |
now = str(datetime.datetime.now())[:16] | |
print(str(e)) | |
def scroll_down(sbypx): | |
'''function to scroll by sbypx pixels''' | |
global driver | |
driver.execute_script("window.scrollBy(0, %d);" % (sbypx)) | |
time.sleep(0.5) | |
def setbrowser(): | |
''' function for preparing browser for automation ''' | |
print("Preparing browser") | |
global driver | |
# firefox | |
if brows.lower() in ('firefox',): | |
options = Options() | |
profile = webdriver.FirefoxProfile() | |
#options.set_preference("javascript.enabled", True) | |
#options.profile = firefox_profile | |
#options.set_preference('network.proxy.type', 1) | |
#options.set_preference('network.proxy.socks', '127.0.0.1') | |
#options.set_preference('network.proxy.socks_port', 9050) | |
#options.set_preference('network.proxy.socks_remote_dns', False) | |
#options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36') | |
profile.set_preference("general.useragent.override", f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36') | |
if headless: | |
options.headless = True | |
service = Service(geckodriver_path) | |
#driver = webdriver.Firefox(service=service, options=options) | |
driver = webdriver.Firefox(firefox_profile=profile, service=service, options=options) | |
driver.maximize_window() | |
driver.implicitly_wait(timeout) | |
def calculate_time(): | |
'''function to calculate elapsed time''' | |
time2 = time.time() | |
hours = int((time2-time1)/3600) | |
minutes = int((time2-time1 - hours * 3600)/60) | |
sec = time2 - time1 - hours * 3600 - minutes * 60 | |
print("processed in %dh:%dm:%ds" % (hours, minutes, sec)) | |
def driverquit(): | |
''' quiting session ''' | |
global driver | |
driver.quit() | |
driver = None | |
calculate_time() | |
sys.exit() | |
def restartff(): | |
''' restart firefox ''' | |
global driver | |
driver.quit() | |
driver = None | |
setbrowser() | |
print('Preparing browser done') | |
def login(): | |
''' login ''' | |
global driver | |
openurl('https://amazon.com.au') | |
print('Login attempt') | |
#some = input('press enter') | |
try: | |
el = driver.find_element(By.ID, "nav-link-accountList-nav-line-1").click() | |
time.sleep(5) | |
print('Sign in clicked') | |
except Exception as e: | |
print(str(e)) | |
try: | |
el = driver.find_element(By.ID, "ap_email").send_keys('[email protected]') | |
time.sleep(2) | |
print('email entered') | |
except Exception as e: | |
print(str(e)) | |
try: | |
el = driver.find_element(By.ID, "continue").click() | |
time.sleep(3) | |
print('continue clicked') | |
except Exception as e: | |
print(str(e)) | |
try: | |
el = driver.find_element(By.ID, "ap_password").send_keys('Upw0rker') | |
time.sleep(2) | |
print('password entered') | |
except Exception as e: | |
print(str(e)) | |
try: | |
el = driver.find_element(By.ID, "signInSubmit").click() | |
time.sleep(15) | |
print('Submit clicked, login successful!') | |
except Exception as e: | |
print(str(e)) | |
def main(): | |
''' main function ''' | |
global driver | |
while True: | |
# ps5 | |
openurl('https://www.amazon.com.au/gp/product/B08HHV8945/') | |
time.sleep(5) | |
try: | |
xpath = '//span[@class="a-color-price a-text-bold" and text()="Currently unavailable."]' | |
el = driver.find_element(By.XPATH, xpath) | |
except Exception as e: | |
if 'Tried to run command without establishing a connection' in str(e): | |
restartff() | |
driver.save_screenshot(dir_in + 'photo.png') | |
ht = driver.page_source | |
with codecs.open(dir_in + 'html.txt', 'w', 'utf8') as f: | |
f.write(html.unescape(ht)) | |
break | |
print('sleeping 120 seconds') | |
time.sleep(120) | |
if __name__ == '__main__': | |
setbrowser() | |
print('Preparing browser done') | |
try: | |
main() | |
driverquit() | |
except Exception as e: | |
print(str(e)) | |
print(traceback.format_exc()) | |
driverquit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment