Last active
September 24, 2024 09:43
-
-
Save slash-cyberpunk/db2ae28586567036d65d50f6317feb47 to your computer and use it in GitHub Desktop.
Config for stasd_exporter v0.22.7 and airflow 2.10.1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| mappings: | |
| - match: 'airflow\.(.+)_(start|end|heartbeat_failure)$' | |
| match_type: regex | |
| help: "Number of status jobs" | |
| name: "airflow_jobs" | |
| labels: | |
| status: "$2" | |
| job_name: "$1" | |
| - match: "airflow.local_task_job.task_exit.*.*.*.*" | |
| help: "Number of LocalTaskJob terminations with a return_code while running a task of a DAG." | |
| name: "airflow_local_task_job" | |
| labels: | |
| job_id: "$1" | |
| dag_id: "$2" | |
| task_id: "$3" | |
| return_code: "$4" | |
| - match: 'airflow\.operator_(failures|successes)_(.+$)' | |
| match_type: regex | |
| help: "Operator status" | |
| name: "airflow_operators" | |
| labels: | |
| status: "$1" | |
| operator_name: "$2" | |
| - match: 'airflow\.ti_(failures|successes)' | |
| match_type: regex | |
| help: "Overall task instances 'type'" | |
| name: "airflow_task_instances" | |
| labels: | |
| type: "$1" | |
| - match: "airflow.previously_succeeded" | |
| help: "Number of previously succeeded task instances" | |
| name: "airflow_previously_succeeded" | |
| - match: "airflow.zombies_killed" | |
| help: "Zombie tasks killed" | |
| name: "airflow_zombies_killed" | |
| - match: "airflow.scheduler_heartbeat" | |
| help: "Scheduler heartbeats" | |
| name: "airflow_scheduler_heartbeat" | |
| - match: "airflow.dag_processing.processes" | |
| help: "Number of currently running DAG parsing processes" | |
| name: "airflow_dag_processing_processes" | |
| - match: "airflow.dag_processing.processor_timeouts" | |
| help: "Number of file processors that have been killed due to taking too long. Metric with file_path tagging." | |
| name: "airflow_dag_processing_processor_timeouts" | |
| - match: "airflow.dag_processing.sla_callback_count" | |
| help: "Number of SLA callbacks received" | |
| name: "airflow_dag_processing_sla_callback_count" | |
| - match: "airflow.dag_processing.other_callback_count" | |
| help: "Number of non-SLA callbacks received" | |
| name: "airflow_dag_processing_other_callback_count" | |
| - match: "airflow.dag_processing.file_path_queue_update_count" | |
| help: "Number of times we have scanned the filesystem and queued all existing dags" | |
| name: "airflow_dag_processing_file_path_queue_update_count" | |
| - match: "airflow.dag_processing.manager_stalls" | |
| help: "Number of stalled DagFileProcessorManager" | |
| name: "airflow_dag_processing_manager_stalls" | |
| - match: "airflow.dag_file_refresh_error" | |
| help: "Number of failures loading any DAG files" | |
| name: "airflow_dag_file_refresh_error" | |
| - match: "airflow.scheduler.tasks.*" | |
| help: "Number of tasks 'status' in scheduler" | |
| name: "airflow_scheduler_tasks" | |
| labels: | |
| status: "$1" | |
| - match: "airflow.scheduler.orphaned_tasks.*" | |
| help: "Number of Orphaned tasks cleared or adopted by the Scheduler" | |
| name: "airflow_scheduler_orphaned_tasks" | |
| labels: | |
| status: "$1" | |
| - match: "airflow.scheduler.critical_section_busy" | |
| help: "Count of times a scheduler process tried to get a lock on the critical section (needed to send tasks to the executor) and found it locked by another process." | |
| name: "airflow_scheduler_critical_section_busy" | |
| - match: "airflow.sla_missed" | |
| help: "Number of SLA misses. Metric with dag_id and task_id tagging." | |
| name: "airflow_sla_missed" | |
| - match: "airflow.sla_callback_notification_failure" | |
| help: "Number of failed SLA miss callback notification attempts. Metric with dag_id and func_name tagging." | |
| name: "airflow_sla_callback_notification_failure" | |
| - match: "airflow.sla_email_notification_failure" | |
| help: "Number of failed SLA miss email notification attempts" | |
| name: "airflow_sla_email_notification_failure" | |
| - match: "airflow.ti.start.*.*" | |
| help: "Number of started or completed task in a given dag. Similar to airflow_jobs but for task" | |
| name: "airflow_taskinstance" | |
| labels: | |
| status: "start" | |
| dag_id: "$1" | |
| task_id: "$2" | |
| state: "None" # Please note that metrics with the same name must also have the same set of label names. | |
| - match: "airflow.ti.finish.*.*.*" | |
| help: "Number of started or completed task in a given dag. Similar to airflow_jobs but for task" | |
| name: "airflow_taskinstance" | |
| labels: | |
| status: "finish" | |
| dag_id: "$1" | |
| task_id: "$2" | |
| state: "$3" | |
| - match: "airflow.dag.callback_exceptions" | |
| help: "Number of exceptions raised from DAG callbacks. When this happens, it means DAG callback is not working." | |
| name: "airflow_dag_callback_exceptions" | |
| - match: "airflow.celery.task_timeout_error" | |
| help: "Number of AirflowTaskTimeout errors raised when publishing Task to Celery Broker." | |
| name: "airflow_celery_task_timeout_error" | |
| - match: "airflow.celery.execute_command.failure" | |
| help: "Number of non-zero exit code from Celery task." | |
| name: "airflow_celery_execute_command_failure" | |
| - match: "airflow.task_removed_from_dag.*" | |
| help: "Number of tasks removed or restored for a given dag" | |
| name: "airflow_tasks_dag" | |
| labels: | |
| status: "removed" | |
| dag_id: "$1" | |
| - match: "airflow.task_restored_to_dag.*" | |
| help: "Number of tasks removed or restored for a given dag" | |
| name: "airflow_tasks_dag" | |
| labels: | |
| status: "restored" | |
| dag_id: "$1" | |
| - match: 'airflow\.task_instance_created_(.+)' | |
| match_type: regex | |
| help: "Number of tasks instances created for a given Operator" | |
| name: "airflow_taskinstance_created" | |
| - match: "airflow.triggerer_heartbeat" | |
| help: "Triggerer heartbeats" | |
| name: "airflow_triggerer_heartbeat" | |
| - match: "airflow.triggers.*" | |
| help: "Number of triggers that by status" | |
| name: "airflow_triggers" | |
| labels: | |
| status: "$1" | |
| - match: "airflow.dataset.*" | |
| help: "Number of dataset by type" | |
| name: "airflow_dataset" | |
| labels: | |
| type: "$1" | |
| - match: "airflow.dagbag_size" | |
| help: "DAG bag size" | |
| name: "airflow_dagbag_size" | |
| - match: "airflow.dag_processing.import_errors" | |
| help: "Number of errors from trying to parse DAG files" | |
| name: "airflow_dag_processing_import_errors" | |
| - match: "airflow.dag_processing.total_parse_time" | |
| help: "Seconds taken to scan and import all DAG files once" | |
| name: "airflow_dag_processing_total_parse_time" | |
| - match: "airflow.dag_processing.file_path_queue_size" | |
| help: "Number of DAG files to be considered for the next scan" | |
| name: "airflow_dag_processing_file_path_queue_size" | |
| - match: "airflow.dag_processing.last_run.seconds_ago.*" | |
| help: "Seconds since <dag_file> was last processed" | |
| name: "airflow_dag_processing_last_run" | |
| labels: | |
| dag_file: "$1" | |
| - match: "airflow.dag_processing.last_num_of_db_queries.*" | |
| help: "Number of queries to Airflow database during parsing per dag_file" | |
| name: "airflow_dag_processing_last_num_of_db_queries" | |
| labels: | |
| dag_file: "$1" | |
| - match: "airflow.executor.*" | |
| help: "Number of 'type' tasks on executor" | |
| name: "airflow_executor" | |
| labels: | |
| type: "$1" | |
| - match: "airflow.pool.*.*" | |
| help: "Number of 'type' in the pool" | |
| name: "airflow_pool" | |
| labels: | |
| type: "$1" | |
| pool_name: "$2" | |
| - match: "airflow.task.cpu_usage_percent.*.*" | |
| help: "Percentage of CPU used by a task" | |
| name: "airflow_task_cpu_usage_percent" | |
| labels: | |
| dag_id: "$1" | |
| task_id: "$2" | |
| - match: "airflow.task.mem_usage_percent.*.*" | |
| help: "Percentage of memory used by a task" | |
| name: "airflow_task_mem_usage_percent" | |
| labels: | |
| dag_id: "$1" | |
| task_id: "$2" | |
| - match: "airflow.triggers.running.*" | |
| help: "Number of triggers currently running for a triggerer (described by hostname)" | |
| name: "airflow_triggers_running" | |
| labels: | |
| hostname: "$1" | |
| - match: "airflow.dagrun.dependency-check.*" | |
| help: "Milliseconds taken to check DAG dependencies" | |
| observer_type: summary | |
| name: "airflow_dagrun_dependency_check" | |
| labels: | |
| dag_id: "$1" | |
| - match: "airflow.dag.*.*.*" | |
| help: "Milliseconds a task spends by type" | |
| observer_type: summary | |
| name: "airflow_dag_durations" | |
| labels: | |
| type: "$3" | |
| dag_id: "$1" | |
| task_id: "$2" | |
| - match: "airflow.dag_processing.last_duration.*" | |
| help: "Milliseconds taken to load the given DAG file" | |
| observer_type: summary | |
| name: "airflow_dag_processing_last_duration" | |
| labels: | |
| dag_file: "$1" | |
| - match: "airflow.dagrun.duration.*.*" | |
| help: "Milliseconds taken for a DagRun to reach 'type' state" | |
| observer_type: summary | |
| name: "airflow_dagrun_duration" | |
| labels: | |
| type: "$1" | |
| dag_id: "$2" | |
| - match: "airflow.dagrun.schedule_delay.*" | |
| help: "Milliseconds of delay between the scheduled DagRun start date and the actual DagRun start date" | |
| observer_type: summary | |
| name: "airflow_dagrun_schedule_delay" | |
| labels: | |
| dag_id: "$1" | |
| - match: "airflow.scheduler.critical_section_duration" | |
| help: "Milliseconds spent in the critical section of scheduler loop – only a single scheduler can enter this loop at a time" | |
| observer_type: summary | |
| name: "airflow_scheduler_critical_section_duration" | |
| - match: "airflow.scheduler.critical_section_query_duration" | |
| help: "Milliseconds spent running the critical section task instance query" | |
| observer_type: summary | |
| name: "airflow_scheduler_critical_section_query_duration" | |
| - match: "airflow.scheduler.scheduler_loop_duration" | |
| help: "Milliseconds spent running one scheduler loop" | |
| observer_type: summary | |
| name: "airflow_scheduler_scheduler_loop_duration" | |
| - match: "airflow.dagrun.*.first_task_scheduling_delay" | |
| help: "Milliseconds elapsed between first task start_date and dagrun expected start" | |
| observer_type: summary | |
| name: "airflow_dagrun_first_task_scheduling_delay" | |
| labels: | |
| dag_id: "$1" | |
| - match: "airflow.collect_db_dags" | |
| help: "Milliseconds taken for fetching all Serialized Dags from DB" | |
| observer_type: summary | |
| name: "airflow_collect_db_dags" | |
| - match: "airflow.kubernetes_executor.*.duration" | |
| help: "Milliseconds taken for by type in Kubernetes Executor" | |
| observer_type: summary | |
| name: "airflow_kubernetes_executor_duration" | |
| labels: | |
| type: "$1" | |
| - match: . | |
| match_type: regex | |
| action: drop | |
| name: "dropped" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment