Last active
November 16, 2023 21:13
-
-
Save tallpeak/c8a9254370b24b8fbab6a5c803204370 to your computer and use it in GitHub Desktop.
SDCD download with Selenium webdriver (Chrome)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://gist.github.com/tallpeak/c8a9254370b24b8fbab6a5c803204370 | |
#based on https://www.guru99.com/selenium-python.html | |
from selenium import webdriver | |
from selenium.webdriver.common.by import * | |
from selenium.webdriver.support.ui import WebDriverWait | |
import sys | |
import os | |
arg1 = len(sys.argv) > 1 and sys.argv[1] or "" | |
if arg1 == "-f" or arg1 == "--full" or arg1=="f" or arg1=="full": | |
full = True | |
elif arg1 == '-i' or arg1 == "--incremental" or arg1=="i" or arg1=="incremental": | |
full = False | |
else: | |
print("f=full Sfa8, i=incremental (XM_####W.gz)") | |
full = input().lower().startswith("f") | |
# browser = webdriver.Chrome() | |
options = webdriver.ChromeOptions() | |
options.add_argument("start-maximized") | |
options.add_experimental_option('excludeSwitches', ['enable-logging']) # to supress the error messages/logs | |
# options.add_argument("--headless=new") | |
browser = webdriver.Chrome(options=options) | |
browser.get("http://www.sdcd.com/Login?ReturnUrl=%2fSuperfile%2fA8GZ") | |
username = browser.find_element(By.ID, "ctl00_cphLogin_tbUser") | |
password = browser.find_element(By.ID, "ctl00_cphLogin_tbPw") | |
submit = browser.find_element(By.ID, "ctl00_cphLogin_btnLogin") | |
with open(os.path.expanduser("~/secrets/sdcd.txt")) as f: | |
(un,pw) = f.read().strip().split(":") | |
username.clear() | |
username.send_keys(un) | |
password.clear() | |
password.send_keys(pw) | |
submit.click() | |
#wait = WebDriverWait( browser, 1 ) | |
page_title = browser.title | |
print(page_title) | |
#assert page_title == "Superfile Download A8 Gzip: Sdcd.com - DVD Movies / CD Music Wholesale Distributor" | |
if full: | |
#https://stackoverflow.com/questions/3813294/how-to-get-element-by-innertext | |
#browser.execute_script() : var matchingElement = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | |
link = browser.find_element(By.XPATH, "//a[text()='Sfa8']") | |
repattern = "^SFA8.gz$" | |
else: | |
link = browser.find_element(By.ID,"ctl00_cphBody_FileDownload1_rpt1_ctl01_lbtn1") | |
repattern = "^XM_[0-9]{4}W.gz$" | |
link.text | |
link.click() | |
import time | |
# time.sleep(10*60) | |
#should be in downloads | |
import os | |
import re | |
dl = os.path.expanduser( "~/Downloads/") | |
import datetime | |
dayago = datetime.datetime.now() - datetime.timedelta(days=1) | |
dayago_timestamp = dayago.timestamp() | |
for r in range(1000): | |
print(r," sleeping 10 seconds while looking for files...",end="\r") | |
time.sleep(10) | |
for f in os.listdir(dl): | |
if re.search(repattern, f) and os.path.getctime(f"{dl}/{f}") > dayago_timestamp: | |
cmd = f"scp {dl}/{f} [email protected]:" | |
print(cmd) | |
os.system(cmd) | |
cmd = f"qomph ~/import_superfile_copyfrom.py" | |
print(cmd) | |
os.system(cmd) | |
break | |
# done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment