Skip to content

Instantly share code, notes, and snippets.

@tallpeak
Last active November 16, 2023 21:13
Show Gist options
  • Save tallpeak/c8a9254370b24b8fbab6a5c803204370 to your computer and use it in GitHub Desktop.
Save tallpeak/c8a9254370b24b8fbab6a5c803204370 to your computer and use it in GitHub Desktop.
SDCD download with Selenium webdriver (Chrome)
#https://gist.github.com/tallpeak/c8a9254370b24b8fbab6a5c803204370
#based on https://www.guru99.com/selenium-python.html
from selenium import webdriver
from selenium.webdriver.common.by import *
from selenium.webdriver.support.ui import WebDriverWait
import sys
import os
arg1 = len(sys.argv) > 1 and sys.argv[1] or ""
if arg1 == "-f" or arg1 == "--full" or arg1=="f" or arg1=="full":
full = True
elif arg1 == '-i' or arg1 == "--incremental" or arg1=="i" or arg1=="incremental":
full = False
else:
print("f=full Sfa8, i=incremental (XM_####W.gz)")
full = input().lower().startswith("f")
# browser = webdriver.Chrome()
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option('excludeSwitches', ['enable-logging']) # to supress the error messages/logs
# options.add_argument("--headless=new")
browser = webdriver.Chrome(options=options)
browser.get("http://www.sdcd.com/Login?ReturnUrl=%2fSuperfile%2fA8GZ")
username = browser.find_element(By.ID, "ctl00_cphLogin_tbUser")
password = browser.find_element(By.ID, "ctl00_cphLogin_tbPw")
submit = browser.find_element(By.ID, "ctl00_cphLogin_btnLogin")
with open(os.path.expanduser("~/secrets/sdcd.txt")) as f:
(un,pw) = f.read().strip().split(":")
username.clear()
username.send_keys(un)
password.clear()
password.send_keys(pw)
submit.click()
#wait = WebDriverWait( browser, 1 )
page_title = browser.title
print(page_title)
#assert page_title == "Superfile Download A8 Gzip: Sdcd.com - DVD Movies / CD Music Wholesale Distributor"
if full:
#https://stackoverflow.com/questions/3813294/how-to-get-element-by-innertext
#browser.execute_script() : var matchingElement = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
link = browser.find_element(By.XPATH, "//a[text()='Sfa8']")
repattern = "^SFA8.gz$"
else:
link = browser.find_element(By.ID,"ctl00_cphBody_FileDownload1_rpt1_ctl01_lbtn1")
repattern = "^XM_[0-9]{4}W.gz$"
link.text
link.click()
import time
# time.sleep(10*60)
#should be in downloads
import os
import re
dl = os.path.expanduser( "~/Downloads/")
import datetime
dayago = datetime.datetime.now() - datetime.timedelta(days=1)
dayago_timestamp = dayago.timestamp()
for r in range(1000):
print(r," sleeping 10 seconds while looking for files...",end="\r")
time.sleep(10)
for f in os.listdir(dl):
if re.search(repattern, f) and os.path.getctime(f"{dl}/{f}") > dayago_timestamp:
cmd = f"scp {dl}/{f} [email protected]:"
print(cmd)
os.system(cmd)
cmd = f"qomph ~/import_superfile_copyfrom.py"
print(cmd)
os.system(cmd)
break
# done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment