Last active
August 28, 2022 13:55
-
-
Save mmziyad/e8905e0719c957a15e15362e95b97944 to your computer and use it in GitHub Desktop.
A really quick on-boarding for Apache airflow.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install | |
mkdir ~/airflow | |
cd ~/airflow | |
pip install airflow | |
# Have a look here if you need additional packages: https://airflow.incubator.apache.org/installation.html | |
# setup mysql backend as given here. The default SQLite is not adequate for some workloads. | |
# http://site.clairvoyantsoft.com/installing-and-configuring-apache-airflow/ | |
# start services | |
mysql.server start | |
nohup airflow webserver $* >> ~/airflow/logs/webserver.logs & | |
nohup airflow scheduler >> ~/airflow/logs/scheduler.logs & | |
# setup mysql | |
CREATE USER 'airflow'@'localhost' IDENTIFIED BY 'airflow'; | |
GRANT ALL PRIVILEGES ON *.* TO 'airflow'@'localhost' WITH GRANT OPTION; | |
airflow initdb | |
# write your awesome dags. Airflow tutorial is just awesome! | |
# https://airflow.incubator.apache.org/tutorial.html | |
# to see current dags: | |
airflow list_dags | |
# to test your dag: | |
# sanity check: | |
python ~/airflow/dags/dag_contents_dir/dag_your_aweseome_workflow.py | |
# list and test each task inside your dag named 'your-awesome-workflow': | |
airflow list_tasks your-awesome-workflow | |
airflow test your-awesome-workflow your-awesome-workflow-task YYYY-MM-DD | |
# manual trigger | |
airflow trigger_dag -e YYYY-MM-DD your-awesome-workflow | |
# to manually trigger externally triggered dag | |
airflow trigger_dag --conf {"file_variable": "/path/to/file"} dag_id | |
airflow trigger_dag -c '{"batch": "YYYYMMDD"}' your-awesome-externally-triggered-workflow | |
# to update packages | |
pip install --upgrade google-api-python-client | |
# to reset db | |
airflow resetdb | |
# shutdown services: | |
kill $(ps -ef | grep "airflow webserver" | awk '{print $2}') | |
kill $(ps -ef | grep "airflow scheduler" | awk '{print $2}') | |
# in order to re-run a dag run: | |
# 1. From airflow UI, delete all the task instances for the dag run | |
# 2. From airflow UI, change state of the dag run instance to ** running ** |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment