Created
May 18, 2020 07:54
-
-
Save matt-bertoncello/a7296c4fc6cdbb8424ffa26b2b9b9257 to your computer and use it in GitHub Desktop.
Dockerfile and python script to run selenium with headless chromedriver in debian docker container.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# define base image as python slim-buster. | |
FROM python:3.7-slim-buster as base | |
## start builder stage. | |
# this is the first stage of the build. | |
# it will install all requirements. | |
FROM base as builder | |
# install all packages for chromedriver: https://gist.github.com/varyonic/dea40abcf3dd891d204ef235c6e8dd79 | |
RUN apt-get update && \ | |
apt-get install -y xvfb gnupg wget curl unzip --no-install-recommends && \ | |
wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \ | |
echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list && \ | |
apt-get update -y && \ | |
apt-get install -y google-chrome-stable && \ | |
CHROMEVER=$(google-chrome --product-version | grep -o "[^\.]*\.[^\.]*\.[^\.]*") && \ | |
DRIVERVER=$(curl -s "https://chromedriver.storage.googleapis.com/LATEST_RELEASE_$CHROMEVER") && \ | |
wget -q --continue -P /chromedriver "http://chromedriver.storage.googleapis.com/$DRIVERVER/chromedriver_linux64.zip" && \ | |
unzip /chromedriver/chromedriver* -d /chromedriver | |
# make the chromedriver executable and move it to default selenium path. | |
RUN chmod +x /chromedriver/chromedriver | |
RUN mv /chromedriver/chromedriver /usr/bin/chromedriver | |
# copy any python requirements file into the install directory and install all python requirements. | |
COPY requirements.txt /requirements.txt | |
RUN pip install --upgrade --no-cache-dir -r /requirements.txt | |
RUN rm /requirements.txt # remove requirements file from container. | |
# copy the source code into /app and move into that directory. | |
COPY src /app | |
## end builder stage. | |
##### | |
## start base stage. | |
# this is the image this is run. | |
FROM builder | |
# set the proxy addresses | |
ENV HTTP_PROXY "http://134.209.29.120:8080" | |
ENV HTTPS_PROXY "https://45.77.71.140:9050" | |
# default entry point. | |
CMD ["python", "app/webscraper.py", "-c"] | |
## end base stage. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import external libraries. | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from pyvirtualdisplay import Display | |
# set xvfb display since there is no GUI in docker container. | |
display = Display(visible=0, size=(800, 600)) | |
display.start() | |
chrome_options = Options() | |
chrome_options.add_argument('--no-sandbox') | |
chrome_options.add_argument('--disable-dev-shm-usage') | |
print('building session') | |
driver = webdriver.Chrome(options=chrome_options) | |
## DO STUFF | |
# close chromedriver and display | |
session.quit() | |
display.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment