Created
December 31, 2023 13:32
-
-
Save xescuder/c05305c92035202693818db272868bda to your computer and use it in GitHub Desktop.
Docker Compose Airflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: "3" | |
x-airflow-common: &airflow-common | |
# In order to add custom dependencies or upgrade provider packages you can use your extended image. | |
# Comment the image line, place your Dockerfile in the directory where you placed the docker-compose.yaml | |
# and uncomment the "build" line below, Then run `docker-compose build` to build the images. | |
image: trading-airflow #${AIRFLOW_IMAGE_NAME:-apache/airflow} | |
# build: . | |
environment: &airflow-common-env | |
AIRFLOW__CORE__EXECUTOR: LocalExecutor | |
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow | |
# For backward compatibility, with Airflow <2.3 | |
AIRFLOW__CORE__SQL_ALCHEMY_CONN: postgresql+psycopg2://airflow:airflow@postgres/airflow | |
AIRFLOW__CORE__FERNET_KEY: "" | |
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: "true" | |
AIRFLOW__CORE__LOAD_EXAMPLES: "false" # Uncomment to see some tutorials and examples | |
AIRFLOW__API__AUTH_BACKENDS: "airflow.api.auth.backend.session" | |
AIRFLOW__SECRETS__BACKEND: airflow.secrets.local_filesystem.LocalFilesystemBackend | |
AIRFLOW__SECRETS__BACKEND_KWARGS: '{"variables_file_path": "/opt/secrets/variables.yaml", "connections_file_path": "/opt/secrets/connections.yaml"}' | |
_PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:- apache-airflow-providers-apache-spark} | |
volumes: | |
- ${AIRFLOW_DAGS}:/opt/airflow/dags | |
- ${AIRFLOW_LOGS}:/opt/airflow/logs | |
- ./trading_data_pipeline/:/opt/airflow/trading_data_pipeline | |
networks: | |
- airflow-spark-network | |
user: "${AIRFLOW_UID:-50000}:0" | |
depends_on: &airflow-common-depends-on | |
postgres: | |
condition: service_healthy | |
services: | |
airflow-webserver: | |
<<: *airflow-common | |
command: webserver | |
ports: | |
- 8080:8080 | |
healthcheck: | |
test: ["CMD", "curl", "--fail", "http://localhost:8080/health"] | |
interval: 10s | |
timeout: 10s | |
retries: 5 | |
restart: always | |
depends_on: | |
<<: *airflow-common-depends-on | |
airflow-init: | |
condition: service_completed_successfully | |
airflow-scheduler: | |
<<: *airflow-common | |
command: scheduler | |
healthcheck: | |
test: | |
[ | |
"CMD-SHELL", | |
'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"', | |
] | |
interval: 10s | |
timeout: 10s | |
retries: 5 | |
restart: always | |
depends_on: | |
<<: *airflow-common-depends-on | |
airflow-init: | |
condition: service_completed_successfully | |
postgres: | |
image: postgres:13 | |
networks: | |
- airflow-spark-network | |
environment: | |
POSTGRES_USER: airflow | |
POSTGRES_PASSWORD: airflow | |
POSTGRES_DB: airflow | |
volumes: | |
- postgres-db-volume:/var/lib/postgresql/data | |
healthcheck: | |
test: ["CMD", "pg_isready", "-U", "airflow"] | |
interval: 5s | |
retries: 5 | |
restart: always | |
airflow-init: | |
<<: *airflow-common | |
entrypoint: /bin/bash | |
# yamllint disable rule:line-length | |
command: | |
- -c | |
- | | |
function ver() { | |
printf "%04d%04d%04d%04d" $${1//./ } | |
} | |
airflow_version=$$(AIRFLOW__LOGGING__LOGGING_LEVEL=INFO && gosu airflow airflow version) | |
airflow_version_comparable=$$(ver $${airflow_version}) | |
min_airflow_version=2.2.0 | |
min_airflow_version_comparable=$$(ver $${min_airflow_version}) | |
if (( airflow_version_comparable < min_airflow_version_comparable )); then | |
echo | |
echo -e "\033[1;31mERROR!!!: Too old Airflow version $${airflow_version}!\e[0m" | |
echo "The minimum Airflow version supported: $${min_airflow_version}. Only use this or higher!" | |
echo | |
exit 1 | |
fi | |
if [[ -z "${AIRFLOW_UID}" ]]; then | |
echo | |
echo -e "\033[1;33mWARNING!!!: AIRFLOW_UID not set!\e[0m" | |
echo "If you are on Linux, you SHOULD follow the instructions below to set " | |
echo "AIRFLOW_UID environment variable, otherwise files will be owned by root." | |
echo "For other operating systems you can get rid of the warning with manually created .env file:" | |
echo " See: https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#setting-the-right-airflow-user" | |
echo | |
fi | |
one_meg=1048576 | |
mem_available=$$(($$(getconf _PHYS_PAGES) * $$(getconf PAGE_SIZE) / one_meg)) | |
cpus_available=$$(grep -cE 'cpu[0-9]+' /proc/stat) | |
disk_available=$$(df / | tail -1 | awk '{print $$4}') | |
warning_resources="false" | |
if (( mem_available < 4000 )) ; then | |
echo | |
echo -e "\033[1;33mWARNING!!!: Not enough memory available for Docker.\e[0m" | |
echo "At least 4GB of memory required. You have $$(numfmt --to iec $$((mem_available * one_meg)))" | |
echo | |
warning_resources="true" | |
fi | |
if (( cpus_available < 2 )); then | |
echo | |
echo -e "\033[1;33mWARNING!!!: Not enough CPUS available for Docker.\e[0m" | |
echo "At least 2 CPUs recommended. You have $${cpus_available}" | |
echo | |
warning_resources="true" | |
fi | |
if (( disk_available < one_meg * 10 )); then | |
echo | |
echo -e "\033[1;33mWARNING!!!: Not enough Disk space available for Docker.\e[0m" | |
echo "At least 10 GBs recommended. You have $$(numfmt --to iec $$((disk_available * 1024 )))" | |
echo | |
warning_resources="true" | |
fi | |
if [[ $${warning_resources} == "true" ]]; then | |
echo | |
echo -e "\033[1;33mWARNING!!!: You have not enough resources to run Airflow (see above)!\e[0m" | |
echo "Please follow the instructions to increase amount of resources available:" | |
echo " https://airflow.apache.org/docs/apache-airflow/stable/start/docker.html#before-you-begin" | |
echo | |
fi | |
mkdir -p /sources/logs /sources/dags /sources/plugins | |
chown -R "${AIRFLOW_UID}:0" /sources/{logs,dags,plugins} | |
exec /entrypoint airflow version | |
# yamllint enable rule:line-length | |
environment: | |
<<: *airflow-common-env | |
_AIRFLOW_DB_UPGRADE: "true" | |
_AIRFLOW_WWW_USER_CREATE: "true" | |
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME:-airflow} | |
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD:-airflow} | |
_PIP_ADDITIONAL_REQUIREMENTS: "" | |
user: "0:0" | |
volumes: | |
- .:/sources | |
networks: | |
airflow-spark-network: | |
external: true | |
volumes: | |
postgres-db-volume: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment