In the folder create the following files Dockerfile
FROM python:3.10.13
WORKDIR /managed-airflow
COPY mwaa-requirements-build.txt /managed-airflow/requirements.txt
COPY build-whl.sh /managed-airflow/build-whl.sh
RUN apt-get update -yyy && apt-get install -yyy zip
build-whl.sh
#!/usr/bin/env bash
set -euo pipefail
set +x
mkdir -p /managed-airflow/plugins && \
pip3 download -r /managed-airflow/requirements.txt -d /managed-airflow/plugins && \
cd /managed-airflow/plugins && \
zip /managed-airflow/plugins.zip * && \
mv /managed-airflow/plugins.zip /managed-airflow-build
mwaa-requirements-build.txt
--constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.10.txt"
apache-airflow[amazon,databricks,slack,datadog,celery,common.sql,ftp,http,imap,postgres,sqlite,snowflake,mongo,hive]==2.6.3
datadog==0.45.0
pymongo==4.4.0
PyHive==0.6.5
acryl-datahub-airflow-plugin==0.11.0.5
mwaa-requirements.txt
--find-links /usr/local/airflow/plugins
--no-index
apache-airflow[amazon,databricks,slack,datadog,celery,common.sql,ftp,http,imap,postgres,sqlite,snowflake,mongo,hive]==2.6.3
datadog==0.45.0
pymongo==4.4.0
PyHive==0.6.5
acryl-datahub-airflow-plugin==0.11.0.5
#!/usr/bin/env bash
set -euo pipefail
set +x
BASE_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
declare BASE_DIR
echo "building container for packaging"
docker build \
--file ${BASE_DIR}/files/Dockerfile \
--platform linux/amd64 \
-t mwaa-plugins-build ${BASE_DIR}/files
echo "building plugins.zip"
docker run --rm -it \
--platform linux/amd64 \
--mount type=bind,source="${BASE_DIR}"/files,target=/managed-airflow-build \
mwaa-plugins-build:latest /managed-airflow/build-whl.sh
Run build-plugins.sh
and you'll have a zip suitable for uploading to s3 and using in managed airflow.
Ensure the python version is correct and the airflow version for the constraint, then add whatever extra you need.
The following script syncPluginsAndRequirements.sh
can be helpful for uploading and getting the version hashes for use in terraform.
#!/usr/bin/env bash
set -euo pipefail
set +x
BASE_DIR="$(realpath "$(dirname "${BASH_SOURCE[0]}")")"
declare BASE_DIR
export ENVIRONMENT=${1}
export DRY_RUN=${2:-true}
function extract_version() {
ENV=$1
FILE=$2
aws s3api list-object-versions \
--bucket "${ENV}-managed-airflow" \
--prefix ${FILE} | \
jq '.Versions[] | select(.IsLatest == true) | .VersionId'
}
if [ ${DRY_RUN} != "true" ]; then
echo "we will dry run for ${ENVIRONMENT} cp"
aws s3 cp --dryrun "${BASE_DIR}/files/mwaa-requirements.txt" "s3://${ENVIRONMENT}-managed-airflow/requirements.txt"
aws s3 cp --dryrun "${BASE_DIR}/files/plugins.zip" "s3://${ENVIRONMENT}-managed-airflow/plugins.zip"
echo "requirements.txt has version hash $(extract_version "${ENVIRONMENT}" "requirements.txt")"
echo "plugins.zip has version hash $(extract_version "${ENVIRONMENT}" "plugins.zip")"
else
echo "going to ${ENVIRONMENT} cp"
aws s3 cp "${BASE_DIR}/files/mwaa-requirements.txt" "s3://${ENVIRONMENT}-managed-airflow/requirements.txt"
aws s3 cp "${BASE_DIR}/files/plugins.zip" "s3://${ENVIRONMENT}-managed-airflow/plugins.zip"
echo "requirements.txt latest version hash $(extract_version "${ENVIRONMENT}" "requirements.txt")"
echo "plugins.zip latest version hash $(extract_version "${ENVIRONMENT}" "plugins.zip")"
fi