Last active
October 29, 2023 18:34
-
-
Save sudoxx2/2ebbdb52373a6cf3913668aaa2280245 to your computer and use it in GitHub Desktop.
automate_download
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
import os | |
# function to take care of downloading file | |
def enable_download_headless(browser,download_dir): | |
browser.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command') | |
params = {'cmd':'Page.setDownloadBehavior', 'params': {'behavior': 'allow', 'downloadPath': download_dir}} | |
browser.execute("send_command", params) | |
# instantiate a chrome options object so you can set the size and headless preference | |
# some of these chrome options might be uncessary but I just used a boilerplate | |
# change the <path_to_download_default_directory> to whatever your default download folder is located | |
chrome_options = Options() | |
chrome_options.add_argument("--headless") | |
chrome_options.add_argument("--window-size=1920x1080") | |
chrome_options.add_argument("--disable-notifications") | |
chrome_options.add_argument('--no-sandbox') | |
chrome_options.add_argument('--verbose') | |
chrome_options.add_experimental_option("prefs", { | |
"download.default_directory": "<path_to_download_default_directory>", | |
"download.prompt_for_download": False, | |
"download.directory_upgrade": True, | |
"safebrowsing_for_trusted_sources_enabled": False, | |
"safebrowsing.enabled": False | |
}) | |
chrome_options.add_argument('--disable-gpu') | |
chrome_options.add_argument('--disable-software-rasterizer') | |
# initialize driver object and change the <path_to_chrome_driver> depending on your directory where your chromedriver should be | |
driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="<path_to_chrome_driver>") | |
# change the <path_to_place_downloaded_file> to your directory where you would like to place the downloaded file | |
download_dir = "<path_to_place_downloaded_file>" | |
# function to handle setting up headless download | |
enable_download_headless(driver, download_dir) | |
# get request to target the site selenium is active on | |
driver.get("https://www.thinkbroadband.com/download") | |
# initialize an object to the location on the html page and click on it to download | |
search_input = driver.find_element_by_css_selector('#main-col > div > div > div:nth-child(8) > p:nth-child(1) > a > img') | |
search_input.click() |
@SnapDragon7410 It will be detected as bot if the website you are automating has bot protection. There will always ways around bot detection just require a bit more effort.
let me know if you need any help.
Thank you, very helpful
How to do a "save file as"? I want to have the file stored with a different name for version purpose .
Hello @sudoxx2
I am using Windows OS 11, Python version is 3.11.4, and Chrome Driver version is 118.
I'm trying to download the csv file to a designated folder, but it doesn't work.
Please check my code below.
Is there a problem with the path?
Could you please help me solve it?
Thanks in advance.
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--headless')
options.add_argument('--ignore-certificate-errors')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-extensions')
options.add_argument('--verbose')
options.add_argument("--disable-notifications")
options.add_experimental_option(
"excludeSwitches", ["enable-automation", "enable-logging"]
)
prefs = {
"download.default_directory": "C:/Users/xxx/Downloads",
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing_for_trusted_sources_enabled": False,
"safebrowsing.enabled": False
}
options.add_experimental_option("prefs", prefs)
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')
service = Service(executable_path='./chromedriver-win64/chromedriver.exe')
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)
enable_download_headless(driver, "D:/02_work/csv")
driver.implicitly_wait(10)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@xaverrevax no problem hit me if you need any clarification or find any bugs