Skip to content

Instantly share code, notes, and snippets.

@alevene
alevene / append_okta_creds.py
Created January 15, 2020 21:53
Load okta session details into .aws/credentials
"""
Get AWS creds via Okta --> generate .aws/credentials.idea
Backup current credentials file
Take [idea_jdbc] profile from .aws/credentials.idea and append to .aws/credentials
"""
import datetime
import os
import shutil
import sys
import tempfile
@alevene
alevene / dag_dumper.py
Created January 15, 2020 21:41
Dump DAG definitions to file/stdout
"""
Script to print DAG+Task information in a text format. This can be used to quickly compare against other branches
or versions to confirm the "compiled" version of a DAG is matching expectations.
Usage:
1. ~/airflow-install (branch) $ ./worker/run --no-name python3 /opt/airflow/utils/dag_dumper.py -l dag1 dag2 -o /opt/airflow/dags/devenv_config/dumps/dag_dump_branch
2. ~/airflow-install (branch) $ git checkout development
3. ~/airflow-install (development) $ ./worker/run --no-name python3 /opt/airflow/utils/dag_dumper.py -l dag1 dag2 -o /opt/airflow/dags/devenv_config/dumps/dag_dump_dev
4. Run comparison against the 2 output files
@alevene
alevene / dag_ignore.py
Created January 15, 2020 21:40
Print all DAGs except...
import os
import sys
def print_all_dag_files(except_list):
root_dir = '/Users/alex.levene/workspace/airflow-dags/'
exclude_dirs = ['.buildkite', '.idea', '.mine', 'common']
keep_dirs = ['airflow_config']
@alevene
alevene / ignore_all_but
Created January 15, 2020 21:37
Ignore all DAGs except the keepers
#!/usr/bin/env bash
set -o errexit
set -o errtrace
set -o pipefail
set -o nounset
THIS_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"; readonly THIS_SCRIPT_DIR
THIS_GIT_ROOT="$(cd "$THIS_SCRIPT_DIR" && git rev-parse --show-toplevel)"; readonly THIS_GIT_ROOT
@alevene
alevene / restart
Created January 15, 2020 21:36
Airflow install restart containers
#!/usr/bin/env bash
# completely rebuild airflow containers. See below for switches
THIS_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"; readonly THIS_SCRIPT_DIR
THIS_GIT_ROOT="$(cd "$THIS_SCRIPT_DIR" && git rev-parse --show-toplevel)"; readonly THIS_GIT_ROOT
DAGS_GIT_ROOT="$(cd "$THIS_GIT_ROOT/../airflow-dags" && git rev-parse --show-toplevel)"; readonly DAGS_GIT_ROOT
switches="${1}"
@alevene
alevene / push_aws_creds
Last active January 15, 2020 21:20
Push AWS creds produced by aws-okta into airflow worker
#!/usr/bin/env bash
declare -A aws_vars
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"; readonly SCRIPT_DIR
GIT_ROOT="$(cd "$SCRIPT_DIR" && git rev-parse --show-toplevel)"; readonly GIT_ROOT
regex='export ([^=]*)=(.*)'
aws-okta env gusto > ${SCRIPT_DIR}/aws-okta-creds.txt
@alevene
alevene / add_new_partitions.py
Last active September 6, 2019 16:57
Add new partitions to athena based on existing S3 prefixes
def add_new_partitions(table_source_location, database_name, target_table):
"""
Compare existing prefixes in S3 against existing partitions in athena, then add any missing.
Currently only supports one level of partition depth.
:param table_source_location: location of data in S3
:param database_name: athena database
:param target_table: table to compare against
"""
s3_hook = S3Hook()