Skip to content

Instantly share code, notes, and snippets.

@laughingman7743
Last active February 24, 2016 12:47
Show Gist options
  • Save laughingman7743/3f99108bbcd9a4c489f2 to your computer and use it in GitHub Desktop.
Save laughingman7743/3f99108bbcd9a4c489f2 to your computer and use it in GitHub Desktop.
[program:airflow-scheduler]
process_name=%(program_name)s
directory=/home/airflow
environment=PATH="/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH",AIRFLOW_HOME=/home/airflow/airflow
command=/usr/local/bin/airflow scheduler
autostart=true
autorestart=true
user=airflow
redirect_stderr=true
stdout_logfile=/var/log/airflow/%(program_name)s.log
stdout_logfile_maxbytes=10MB
stdout_logfile_backups=5
stopasgroup=true
[program:airflow-webserver]
process_name=%(program_name)s
directory=/home/airflow
environment=PATH="/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH",AIRFLOW_HOME=/home/airflow/airflow
command=/usr/local/bin/airflow webserver
autostart=true
autorestart=true
user=airflow
redirect_stderr=true
stdout_logfile=/var/log/airflow/%(program_name)s.log
stdout_logfile_maxbytes=10MB
stdout_logfile_backups=5
stopasgroup=true
[program:airflow-worker]
process_name=%(program_name)s
directory=/home/airflow
environment=PATH="/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH",AIRFLOW_HOME=/home/airflow/airflow
command=/usr/local/bin/airflow worker
autostart=true
autorestart=true
user=airflow
redirect_stderr=true
stdout_logfile=/var/log/airflow/%(program_name)s.log
stdout_logfile_maxbytes=10MB
stdout_logfile_backups=5
stopasgroup=true
[core]
# The home folder for airflow, default is ~/airflow
airflow_home = /home/airflow/airflow
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
dags_folder = /home/airflow/airflow/dags
# The folder where airflow should store its log files. This location
base_log_folder = /home/airflow/airflow/logs
# An S3 location can be provided for log backups
# For S3, use the full URL to the base folder (starting with "s3://...")
s3_log_folder = None
# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor
executor = LocalExecutor
#executor = CeleryExecutor
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
sql_alchemy_conn = sqlite:////home/airflow/airflow/airflow.db
# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
parallelism = 8
# The number of task instances allowed to run concurrently by the scheduler
dag_concurrency = 4
# The maximum number of active DAG runs per DAG
max_active_runs_per_dag = 4
# Whether to load the examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = False
# Where your Airflow plugins are stored
plugins_folder = /home/airflow/airflow/plugins
# Secret key to save connection passwords in the db
fernet_key = cryptography_not_found_storing_passwords_in_plain_text
# Whether to disable pickling dags
donot_pickle = False
[webserver]
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is use in automated emails that
# airflow sends to point links to the right web server
base_url = http://localhost:8080
# The ip specified when starting the web server
web_server_host = 0.0.0.0
# The port on which to run the web server
web_server_port = 8080
# Secret key used to run your flask app
secret_key = temporary_key
# Number of workers to run the Gunicorn web server
workers = 2
# The worker class gunicorn should use. Choices include
# sync (default), eventlet, gevent
worker_class = sync
# Expose the configuration file in the web server
expose_config = true
# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication
authenticate = False
# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False
[smtp]
# If you want airflow to send emails on retries, failure, and you want to
# the airflow.utils.send_email function, you have to configure an smtp
# server here
smtp_host = localhost
smtp_starttls = True
smtp_user = airflow
smtp_port = 25
smtp_password = airflow
smtp_mail_from = [email protected]
[celery]
# This section only applies if you are using the CeleryExecutor in
# [core] section above
# The app name that will be used by celery
celery_app_name = airflow.executors.celery_executor
# The concurrency that will be used when starting workers with the
# "airflow worker" command. This defines the number of task instances that
# a worker will take, so size up your workers based on the resources on
# your worker box and the nature of your tasks
celeryd_concurrency = 4
# When you start an airflow worker, airflow starts a tiny web server
# subprocess to serve the workers local log files to the airflow main
# web server, who then builds pages and sends them to users. This defines
# the port on which the logs are served. It needs to be unused, and open
# visible from the main web server to connect into the workers.
worker_log_server_port = 8793
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow
# Another key Celery setting
celery_result_backend = db+mysql://airflow:airflow@localhost:3306/airflow
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the port that Celery Flower runs on
flower_port = 5555
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default
[scheduler]
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
# listen (in seconds).
job_heartbeat_sec = 5
# The scheduler constantly tries to trigger new tasks (look at the
# scheduler section in the docs for more information). This defines
# how often the scheduler should run (in seconds).
scheduler_heartbeat_sec = 5
# Statsd (https://github.com/etsy/statsd) integration settings
# statsd_on = False
# statsd_host = localhost
# statsd_port = 8125
# statsd_prefix = airflow
[mesos]
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050
# The framework name which Airflow scheduler will register itself as on mesos
framework_name = Airflow
# Number of cpu cores required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_cpu = 1
# Memory in MB required for running one task instance using
# 'airflow run <dag_id> <task_id> <execution_date> --local -p <pickle_id>'
# command on a mesos slave
task_memory = 256
# Enable framework checkpointing for mesos
# See http://mesos.apache.org/documentation/latest/slave-recovery/
checkpoint = False
# Failover timeout in milliseconds.
# When checkpointing is enabled and this option is set, Mesos waits until the configured timeout for
# the MesosExecutor framework to re-register after a failover. Mesos shuts down running tasks if the
# MesosExecutor framework fails to re-register within this timeframe.
# failover_timeout = 604800
# Enable framework authentication for mesos
# See http://mesos.apache.org/documentation/latest/configuration/
authenticate = False
# Mesos credentials, if authentication is enabled
# default_principal = admin
# default_secret = admin
#!/bin/bash
#
# supervisord This scripts turns supervisord on
#
# Author: Mike McGrath <[email protected]> (based off yumupdatesd)
# Jason Koppe <[email protected]> adjusted to read sysconfig,
# use supervisord tools to start/stop, conditionally wait
# for child processes to shutdown, and startup later
#
# chkconfig: 345 83 04
#
# description: supervisor is a process control utility. It has a web based
# xmlrpc interface as well as a few other nifty features.
# processname: supervisord
# config: /etc/supervisord.conf
# pidfile: /var/run/supervisord.pid
#
# Treat unset variables as error
#set -o nounset
# source function library
. /etc/rc.d/init.d/functions
# source system settings
[ -e /etc/sysconfig/supervisord ] && . /etc/sysconfig/supervisord
RETVAL=0
start() {
echo "Starting supervisord: "
if [ -e $PIDFILE ]; then
echo "ALREADY STARTED"
return 1
fi
# start supervisord with options from sysconfig (stuff like -c)
/usr/local/bin/supervisord $OPTIONS
# show initial startup status
/usr/local/bin/supervisorctl $OPTIONS status
# only create the subsyslock if we created the PIDFILE
[ -e $PIDFILE ] && touch /var/lock/subsys/supervisord
}
stop() {
echo -n "Stopping supervisord: "
/usr/local/bin/supervisorctl $OPTIONS shutdown
if [ -n "$WAIT_FOR_SUBPROCESSES" ]; then
echo "Waiting roughly 60 seconds for $PIDFILE to be removed after child processes exit"
for sleep in 2 2 2 2 4 4 4 4 8 8 8 8 last; do
if [ ! -e $PIDFILE ] ; then
echo "Supervisord exited as expected in under $total_sleep seconds"
break
else
if [[ $sleep -eq "last" ]] ; then
echo "Supervisord still working on shutting down. We've waited roughly 60 seconds, we'll let it do its thing from here"
return 1
else
sleep $sleep
total_sleep=$(( $total_sleep + $sleep ))
fi
fi
done
fi
# always remove the subsys. we might have waited a while, but just remove it at this point.
rm -f /var/lock/subsys/supervisord
}
restart() {
stop
start
}
case "$1" in
start)
start
RETVAL=$?
;;
stop)
stop
RETVAL=$?
;;
restart|force-reload)
restart
RETVAL=$?
;;
reload)
/usr/local/bin/supervisorctl $OPTIONS reload
RETVAL=$?
;;
condrestart)
[ -f /var/lock/subsys/supervisord ] && restart
RETVAL=$?
;;
status)
/usr/local/bin/supervisorctl $OPTIONS status
RETVAL=$?
;;
*)
echo $"Usage: $0 {start|stop|status|restart|reload|force-reload|condrestart}"
exit 1
esac
exit $RETVAL
; Sample supervisor config file.
;
; For more information on the config file, please see:
; http://supervisord.org/configuration.html
;
; Notes:
; - Shell expansion ("~" or "$HOME") is not supported. Environment
; variables can be expanded using this syntax: "%(ENV_HOME)s".
; - Comments must have a leading space: "a=b ;comment" not "a=b;comment".
[unix_http_server]
file=/tmp/supervisor.sock ; (the path to the socket file)
;chmod=0700 ; socket file mode (default 0700)
;chown=nobody:nogroup ; socket file uid:gid owner
;username=user ; (default is no username (open server))
;password=123 ; (default is no password (open server))
[inet_http_server] ; inet (TCP) server disabled by default
port=*:9001 ; (ip_address:port specifier, *:port for all iface)
;username=user ; (default is no username (open server))
;password=123 ; (default is no password (open server))
[supervisord]
logfile=/var/log/supervisord.log ; (main log file;default $CWD/supervisord.log)
logfile_maxbytes=10MB ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=5 ; (num of main logfile rotation backups;default 10)
loglevel=info ; (log level;default info; others: debug,warn,trace)
pidfile=/var/run/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=false ; (start in foreground if true;default false)
minfds=1024 ; (min. avail startup file descriptors;default 1024)
minprocs=200 ; (min. avail process descriptors;default 200)
;umask=022 ; (process file creation umask;default 022)
;user=chrism ; (default is current user, required if root)
;identifier=supervisor ; (supervisord identifier, default is 'supervisor')
;directory=/tmp ; (default is not to cd during start)
;nocleanup=true ; (don't clean up tempfiles at start;default false)
;childlogdir=/tmp ; ('AUTO' child log dir, default $TEMP)
;environment=KEY="value" ; (key value pairs to add to environment)
;strip_ansi=false ; (strip ansi escape codes in logs; def. false)
; the below section must remain in the config file for RPC
; (supervisorctl/web interface) to work, additional interfaces may be
; added by defining them in separate rpcinterface: sections
[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///tmp/supervisor.sock ; use a unix:// URL for a unix socket
serverurl=http://127.0.0.1:9001 ; use an http:// url to specify an inet socket
;username=chris ; should be same as http_username if set
;password=123 ; should be same as http_password if set
;prompt=mysupervisor ; cmd line prompt (default "supervisor")
;history_file=~/.sc_history ; use readline history if available
; The below sample program section shows all possible program subsection values,
; create one or more 'real' program: sections to be able to control them under
; supervisor.
;[program:theprogramname]
;command=/bin/cat ; the program (relative uses PATH, can take args)
;process_name=%(program_name)s ; process_name expr (default %(program_name)s)
;numprocs=1 ; number of processes copies to start (def 1)
;directory=/tmp ; directory to cwd to before exec (def no cwd)
;umask=022 ; umask for process (default None)
;priority=999 ; the relative start priority (default 999)
;autostart=true ; start at supervisord start (default: true)
;startsecs=1 ; # of secs prog must stay up to be running (def. 1)
;startretries=3 ; max # of serial start failures when starting (default 3)
;autorestart=unexpected ; when to restart if exited after running (def: unexpected)
;exitcodes=0,2 ; 'expected' exit codes used with autorestart (default 0,2)
;stopsignal=QUIT ; signal used to kill process (default TERM)
;stopwaitsecs=10 ; max num secs to wait b4 SIGKILL (default 10)
;stopasgroup=false ; send stop signal to the UNIX process group (default false)
;killasgroup=false ; SIGKILL the UNIX process group (def false)
;user=chrism ; setuid to this UNIX account to run the program
;redirect_stderr=true ; redirect proc stderr to stdout (default false)
;stdout_logfile=/a/path ; stdout log path, NONE for none; default AUTO
;stdout_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB)
;stdout_logfile_backups=10 ; # of stdout logfile backups (default 10)
;stdout_capture_maxbytes=1MB ; number of bytes in 'capturemode' (default 0)
;stdout_events_enabled=false ; emit events on stdout writes (default false)
;stderr_logfile=/a/path ; stderr log path, NONE for none; default AUTO
;stderr_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB)
;stderr_logfile_backups=10 ; # of stderr logfile backups (default 10)
;stderr_capture_maxbytes=1MB ; number of bytes in 'capturemode' (default 0)
;stderr_events_enabled=false ; emit events on stderr writes (default false)
;environment=A="1",B="2" ; process environment additions (def no adds)
;serverurl=AUTO ; override serverurl computation (childutils)
; The below sample eventlistener section shows all possible
; eventlistener subsection values, create one or more 'real'
; eventlistener: sections to be able to handle event notifications
; sent by supervisor.
;[eventlistener:theeventlistenername]
;command=/bin/eventlistener ; the program (relative uses PATH, can take args)
;process_name=%(program_name)s ; process_name expr (default %(program_name)s)
;numprocs=1 ; number of processes copies to start (def 1)
;events=EVENT ; event notif. types to subscribe to (req'd)
;buffer_size=10 ; event buffer queue size (default 10)
;directory=/tmp ; directory to cwd to before exec (def no cwd)
;umask=022 ; umask for process (default None)
;priority=-1 ; the relative start priority (default -1)
;autostart=true ; start at supervisord start (default: true)
;startsecs=1 ; # of secs prog must stay up to be running (def. 1)
;startretries=3 ; max # of serial start failures when starting (default 3)
;autorestart=unexpected ; autorestart if exited after running (def: unexpected)
;exitcodes=0,2 ; 'expected' exit codes used with autorestart (default 0,2)
;stopsignal=QUIT ; signal used to kill process (default TERM)
;stopwaitsecs=10 ; max num secs to wait b4 SIGKILL (default 10)
;stopasgroup=false ; send stop signal to the UNIX process group (default false)
;killasgroup=false ; SIGKILL the UNIX process group (def false)
;user=chrism ; setuid to this UNIX account to run the program
;redirect_stderr=false ; redirect_stderr=true is not allowed for eventlisteners
;stdout_logfile=/a/path ; stdout log path, NONE for none; default AUTO
;stdout_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB)
;stdout_logfile_backups=10 ; # of stdout logfile backups (default 10)
;stdout_events_enabled=false ; emit events on stdout writes (default false)
;stderr_logfile=/a/path ; stderr log path, NONE for none; default AUTO
;stderr_logfile_maxbytes=1MB ; max # logfile bytes b4 rotation (default 50MB)
;stderr_logfile_backups=10 ; # of stderr logfile backups (default 10)
;stderr_events_enabled=false ; emit events on stderr writes (default false)
;environment=A="1",B="2" ; process environment additions
;serverurl=AUTO ; override serverurl computation (childutils)
; The below sample group section shows all possible group values,
; create one or more 'real' group: sections to create "heterogeneous"
; process groups.
;[group:thegroupname]
;programs=progname1,progname2 ; each refers to 'x' in [program:x] definitions
;priority=999 ; the relative start priority (default 999)
; The [include] section can just contain the "files" setting. This
; setting can list multiple files (separated by whitespace or
; newlines). It can also contain wildcards. The filenames are
; interpreted as relative to this file. Included files *cannot*
; include files themselves.
[include]
files=/etc/supervisord.d/*.conf
# this is sourced by the supervisord init script
# written by jkoppe
set -a
# should probably put both of these options as runtime arguments
OPTIONS="-c /etc/supervisord.conf"
PIDFILE=/var/run/supervisord.pid
# unset this variable if you don't care to wait for child processes to shutdown before removing the /var/lock/subsys/supervisord lock
WAIT_FOR_SUBPROCESSES=yes
# remove this if you manage number of open files in some other fashion
ulimit -n 96000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment