Created
February 5, 2024 07:28
-
-
Save sohang3112/ab4ce205e9cc97503017db4c26501e54 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Build: docker build -t seleium_chrome -f selenium_chrome.dockerfile . | |
# Run: docker run selenium_chrome | |
FROM python:3.8 | |
RUN apt-get update && apt-get install -y unzip | |
# Install chrome & chromedriver dependencies | |
# Source: https://github.com/puppeteer/puppeteer/blob/v5.5.0/docs/troubleshooting.md#chrome-headless-doesnt-launch-on-unix | |
# see section "Debian Dependencies" | |
RUN apt-get install -y ca-certificates \ | |
fonts-liberation \ | |
libappindicator3-1 \ | |
libasound2 \ | |
libatk-bridge2.0-0 \ | |
libatk1.0-0 \ | |
libc6 \ | |
libcairo2 \ | |
libcups2 \ | |
libdbus-1-3 \ | |
libexpat1 \ | |
libfontconfig1 \ | |
libgbm1 \ | |
libgcc1 \ | |
libglib2.0-0 \ | |
libgtk-3-0 \ | |
libnspr4 \ | |
libnss3 \ | |
libpango-1.0-0 \ | |
libpangocairo-1.0-0 \ | |
libstdc++6 \ | |
libx11-6 \ | |
libx11-xcb1 \ | |
libxcb1 \ | |
libxcomposite1 \ | |
libxcursor1 \ | |
libxdamage1 \ | |
libxext6 \ | |
libxfixes3 \ | |
libxi6 \ | |
libxrandr2 \ | |
libxrender1 \ | |
libxss1 \ | |
libxtst6 \ | |
lsb-release \ | |
wget \ | |
xdg-utils | |
# See urls of matching chrome and chromedriver urls here: | |
# https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json | |
# Here, using version: 121.0.6167.85 | |
# Install chrome | |
RUN wget -O /tmp/chrome.zip https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/121.0.6167.85/linux64/chrome-linux64.zip | |
RUN unzip /tmp/chrome.zip -d /usr/local/bin/ | |
# Install chromedriver | |
RUN wget -O /tmp/chromedriver.zip https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/121.0.6167.85/linux64/chromedriver-linux64.zip | |
RUN unzip /tmp/chromedriver.zip -d /usr/local/bin/ | |
RUN pip3 install selenium==3.14 | |
ADD test_selenium.py . | |
CMD python3 ./test_selenium.py |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import abspath | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
chrome_path = '/usr/local/bin/chrome-linux64/chrome' | |
chromedriver_path = '/usr/local/bin/chromedriver-linux64/chromedriver' | |
port = 4444 # 9820 | |
# Android user agent | |
user_agent = 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.6045.163 Mobile Safari/537.36' | |
prefs = { | |
"dom.push.enabled": False, | |
"dom.webnotifications.enabled": False, | |
"dom.disable_window_open_feature.location": True, | |
"dom.popup_allowed_events": False, | |
"privacy.popups.disable_from_plugins": 2, | |
"network.cookie.cookieBehavior": 2, | |
"profile.block_third_party_cookies": True, | |
"dom.popup_maximum": 0, | |
"geo.enabled": False, | |
"geo.prompt.testing": True, | |
"geo.prompt.testing.allow": False, | |
"profile.default_content_setting_values.notifications": 2, | |
"profile.managed_default_content_settings.images": 2 | |
} | |
options = Options() | |
options.binary_location = chrome_path | |
options.headless = True | |
options.add_argument('--headless') | |
options.add_argument(f"--remote-debugging-port={port}") | |
# NOTE: Some of these options cause error when remote-debugging-port is specified | |
# WebDriverException: disconnected: not connected to DevTools | |
# Source: https://stackoverflow.com/a/76716772/12947681 | |
# Solution to fix Bug: DevToolsActivePort file doesn't exist (while creating driver) | |
# Run as normal user (not root), OR run with --no-sandbox | |
# https://stackoverflow.com/a/50642913/12947681 | |
options.add_argument('--no-sandbox') | |
# options.add_argument('--start-maximized') | |
# options.add_argument('--start-fullscreen') | |
# options.add_argument('--single-process') | |
# # options.add_argument('--disable-dev-shm-usage') | |
# options.add_argument('--disable-extensions') | |
# # options.add_argument('--disable-gpu') | |
# options.add_argument('--incognito') | |
# options.add_argument("--disable-blink-features=AutomationControlled") | |
# options.add_argument("--disable-notifications") | |
# options.add_argument('--disable-overlay-scrollbar') | |
# options.add_argument("--enable-javascript") | |
# options.add_argument("Access-Control-Allow-Origin='*'") | |
# options.add_argument('Connection=keep-alive') | |
# options.add_argument("Accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") | |
# options.add_argument("Cache-Control=max-age=0") | |
# options.add_argument("Accept-Encoding=gzip, deflate") | |
# options.add_argument("Sec-Fetch-Dest=document") | |
# options.add_argument("Sec-Fetch-Mode=navigate") | |
# options.add_argument("Sec-Ch-Ua-Mobile=?1") | |
# options.add_argument("Sec-Ch-Ua-Platform=Android") | |
# options.add_argument("Sec-Fetch-Site=cross-site") | |
# options.add_argument("Sec-Fetch-User=?1") | |
# options.add_argument("Upgrade-Insecure-Requests=1") | |
# options.add_argument("Accept-Language=en-US,en;q=0.9,fr;q=0.8") | |
# options.add_experimental_option("useAutomationExtension", False) | |
# options.add_experimental_option("excludeSwitches", ["enable-automation"]) | |
# options.add_experimental_option("prefs", prefs) | |
# using Selenium 3 | |
driver = webdriver.Chrome(executable_path=chromedriver_path, options=options) | |
print('Started chrome driver at port', port) | |
driver.get("https://www.google.com") | |
print('Driver loaded google.com') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Related: I haven't tested it myself, but this repo: docker-selenium-lambda is supposed to work well in AWS Lambda.