Skip to content

Instantly share code, notes, and snippets.

@tkellen
Created April 3, 2025 06:42
Show Gist options
  • Save tkellen/40d552366246245da73d51b0350b6e17 to your computer and use it in GitHub Desktop.
Save tkellen/40d552366246245da73d51b0350b6e17 to your computer and use it in GitHub Desktop.
monorepo folder-driven ci pipelines
#!/bin/bash
# Detect changed workspaces to drive a monorepo CI system. Supports bash 3.x+. The output
# of this script is a JSON array of workflows to run. This script is generic and can be
# used for any CI system.
SCRIPT_PATH="$(cd "$(dirname "$0")"; pwd -P)"
REPO_PATH=${REPO_PATH:-"$(cd "${SCRIPT_PATH}/../" && pwd -P)"}
set -e # exit immediately on failures
set -o pipefail # exit code of pipeline is code of first failure, not last
set -o noglob # don't expand glob strings (bash 3x auto-expands some but not others)
cat <<EOF >&2
██╗███╗░░██╗░█████╗░███████╗██████╗░████████╗██╗░█████╗░███╗░░██╗
██║████╗░██║██╔══██╗██╔════╝██╔══██╗╚══██╔══╝██║██╔══██╗████╗░██║
██║██╔██╗██║██║░░╚═╝█████╗░░██████╔╝░░░██║░░░██║██║░░██║██╔██╗██║
██║██║╚████║██║░░██╗██╔══╝░░██╔═══╝░░░░██║░░░██║██║░░██║██║╚████║
██║██║░╚███║╚█████╔╝███████╗██║░░░░░░░░██║░░░██║╚█████╔╝██║░╚███║
╚═╝╚═╝░░╚══╝░╚════╝░╚══════╝╚═╝░░░░░░░░╚═╝░░░╚═╝░╚════╝░╚═╝░░╚══╝
EOF
# Output debug messages to stderr.
function debug {
local BLUE="\033[34m"
local WHITE="\033[37m"
local RESET="\033[0m"
echo -e "${BLUE}[$1]:${RESET} ${WHITE}$2${RESET}" >&2
}
# Support finding files in per-workspace paths using various globbing patterns.
function search {
local WORKSPACE_PATH="$1"
local PATTERN="$2"
(
if [[ "$PATTERN" == "**/*"* ]]; then
find "$WORKSPACE_PATH" -type f -path "$PATTERN" -exec readlink -f {} \;
elif [[ "$PATTERN" == *"**/*"* ]]; then
DIR="${PATTERN%/**/*}"
MATCH="${PATTERN##*/}"
find "$WORKSPACE_PATH/$DIR" -type f -name "$MATCH" -exec readlink -f {} \;
elif [[ "$PATTERN" == *"/*"* ]]; then
find "$WORKSPACE_PATH" -type f -path "$WORKSPACE_PATH/$PATTERN" -exec readlink -f {} \;
else
find "$WORKSPACE_PATH" -maxdepth 1 -type f -name "$PATTERN" -exec readlink -f {} \;
fi
) || true
}
# Go go go!
function inception {
: "${WORKSPACE_ROOT?must be defined}"
BASE_BRANCH=${BASE_BRANCH:-main}
BEFORE_COMMIT=${BEFORE_COMMIT:-0000000000000000000000000000000000000000}
CURRENT_COMMIT=${CURRENT_COMMIT:-HEAD}
MANIFEST_FILE=${MANIFEST_FILE:-Inceptionfile}
# The first step in making a CI system capable of handling multiple projects from a single
# repository is to introduce logic to detect which projects were affected by a given commit.
# This implies knowing which files have changed from one commit to the next, as well as which
# files a given project is "watching". This sounds simple. In practice it can be rather complex.
#
# Most CI system provide access to the current and previous SHA of the commit that triggered a
# given CI run. In the most basic case, which we assume first, the branch being pushed to already
# exists and we are simply adding commits to it. The previous SHA is the previous commit to the
# current branch.
COMMIT_TYPE="standard-push"
BASE_COMMIT="$BEFORE_COMMIT"
# If the previous commit no longer exists, we assume history has been rewritten and the commit is
# a force push. We treat this case the same as a new branch and consider the set of changed files
# to be everything different than the latest commit on main.
if ! git merge-base --is-ancestor "$BEFORE_COMMIT" "$CURRENT_COMMIT" 2>/dev/null; then
COMMIT_TYPE="force-push"
BASE_COMMIT=$(git merge-base "$CURRENT_COMMIT" "origin/$BASE_BRANCH")
fi
# In the case of pushing a new branch, the SHA of the previous commit is open to interpretation.
# Is it the latest commit from main? What happens if you make a branch of a branch? What happens
# if you delete the branch that you branched from before you push the new branch? For the
# purposes of this script, we assume a new branch has no previous commit (sometimes represented
# in CI systems as all zeros) and that we want to compare the new branch to the `main` branch by
# default to determine the files changed.
if [[ "$BEFORE_COMMIT" = "0000000000000000000000000000000000000000" ]]; then
COMMIT_TYPE="new-branch"
BASE_COMMIT=$(git merge-base "$CURRENT_COMMIT" "origin/$BASE_BRANCH")
fi
cd "$REPO_PATH"
# A workspace path is a folder that contains a manifest file that defines workflows and which
# files should trigger them.
WORKSPACE_PATH=($(find "$WORKSPACE_ROOT" -type f -name "$MANIFEST_FILE" -exec dirname {} \;))
WORKSPACE_FILES=()
# Iterate all workspaces resolving paths to each watched file so we can check to see if they
# have changed.
for WORKSPACE_PATH in "${WORKSPACE_PATH[@]}"; do
# Read the manifest file for metadata.
NAME=$(yq ".workspace.name" "${WORKSPACE_PATH}/${MANIFEST_FILE}")
WORKFLOWS=($(yq '.workspace.workflows | keys[]' "${WORKSPACE_PATH}/${MANIFEST_FILE}"))
# A workspace can trigger arbitrary workflows. Find which workflows exist in this workspace.
for WORKFLOW in "${WORKFLOWS[@]}"; do
# Each workflow defines which files changing should trigger it. Find each of these for this workflow.
TRIGGERS=($(yq ".workspace.workflows.$WORKFLOW.paths[]" "${WORKSPACE_PATH}/${MANIFEST_FILE}"))
# Grab which CI runner should be used early, this is sometimes needed statically (e.g. github actions)
RUNS_ON=$(yq -I=0 -o json ".workspace.workflows.$WORKFLOW.runs-on" "${WORKSPACE_PATH}/${MANIFEST_FILE}");
for TRIGGER in "${TRIGGERS[@]}"; do
# Expand trigger patterns (e.g. src/**/*) to a list of files that match.
FILES=($(search "$WORKSPACE_PATH" "$TRIGGER" | sed "s|^$REPO_PATH/||"))
# Annotate each file with details about the workspace and workflow it belongs to.
for FILE in "${FILES[@]}"; do
[[ -n "$FILE" ]] && WORKSPACE_FILES+=("$FILE|${WORKSPACE_PATH}|$NAME|$WORKFLOW|$RUNS_ON")
done
done
done
done
# Populate identically indexed WATCHED_FILES and WORKFLOW_CONTEXTS (no maps in bash 3.x, boo).
WATCHED_FILES=()
WORKFLOW_CONTEXTS=()
for ITEM in "${WORKSPACE_FILES[@]}"; do
IFS="|" read -r FILE BASE_PATH NAME WORKFLOW RUNS_ON <<< "$ITEM"
CONTEXT="{\"name\":\"$NAME\",\"workflow\":\"$WORKFLOW\",\"base-path\":\"${BASE_PATH}\",\"manifest\":\"${BASE_PATH}/${MANIFEST_FILE}\",\"runs-on\":$RUNS_ON}"
WATCHED_FILES+=("$FILE")
WORKFLOW_CONTEXTS+=("$CONTEXT")
done
# Ask git to tell us which files have actually changed.
CHANGED_FILES=($(git diff --name-only $BASE_COMMIT $CURRENT_COMMIT))
RUN=()
# Check all changed files to determine which workflows to trigger.
for CHANGED in "${CHANGED_FILES[@]}"; do
for i in "${!WATCHED_FILES[@]}"; do
FILE="${WATCHED_FILES[$i]}"
CONTEXT="${WORKFLOW_CONTEXTS[$i]}"
# If any of the watched files match those which have changed, record the context for it
# in the list of things to run. Only match once per workflow to make this "fast".
if [[ "$CHANGED" == "$FILE"* && ! " ${RUN[*]} " =~ " $CONTEXT " ]]; then
RUN+=("$CONTEXT")
fi
done
done
# Output useful debugging info on stderr for humans running output from this script.
for var in BASE_BRANCH BEFORE_COMMIT CURRENT_COMMIT REPO_PATH WORKSPACE_ROOT MANIFEST_FILE COMMIT_TYPE BASE_COMMIT; do
debug "$var" "${!var}"
done
debug "CHANGED_FILES" "$(printf "\n%s" "${CHANGED_FILES[@]}" | sed 's/^/ /')"
# Output a JSON array of affected workspaces and workflows for consumption by a CI system.
if [[ ${#RUN[@]} -ne 0 ]]; then
debug "RUN" "\n$(echo "${RUN[@]}" | tr ' ' '\n' | jq -r '.name+" ("+(.workflow)+")"' | sed 's/^/ /')"
printf "[$(printf '%s,' "${RUN[@]}" | sed 's/,$//')]"
else
printf "[]"
fi
}
# Here is a little test suite to make sure pattern matching does what is expected.
function testsuite {
cd "$REPO_PATH"
mkdir -p test/pkg/fixtures
touch test/.gitignore test/go.mod test/go.sum test/main.go test/README.md
touch test/pkg/main.go test/pkg/main_test.go test/pkg/fixtures/test.json
printf "Testing glob function on mock workspace:\n\n"
tree -na --noreport test
printf "\n"
validate "none" "none" ""
validate "single" "go.mod" "test/go.mod"
validate "star" "*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go"
validate "double-star" "**" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go"
validate "star-dot-star" "*.*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go"
validate "non-recursive-scoped" "*.go" "test/main.go"
validate "recursive-all" "**/*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go test/pkg/fixtures/test.json test/pkg/main.go test/pkg/main_test.go"
validate "recursive-scoped" "**/*.go" "test/main.go test/pkg/main.go test/pkg/main_test.go"
validate "prefixed-recursive-all" "pkg/**/*" "test/pkg/fixtures/test.json test/pkg/main.go test/pkg/main_test.go"
validate "prefixed-non-recursive-scoped" "pkg/*.go" "test/pkg/main.go test/pkg/main_test.go"
validate "prefixed-recursive-scoped" "pkg/**/*.go" "test/pkg/main.go test/pkg/main_test.go"
rm -rf test
}
function validate {
local TITLE="$1"
local PATTERN="$2"
local EXPECTED="$3"
printf "test $TITLE ($PATTERN): "
EXPECTED="$(echo "$EXPECTED" | tr ' ' '\n')"
ACTUAL=$(search "test" "$PATTERN" | sed "s|^$REPO_PATH/||")
if [[ "$EXPECTED" == "$ACTUAL" ]]; then
printf "OK\n"
#printf "$ACTUAL\n" | sed 's/^/ /'
else
printf "FAIL\nExpected:\n$EXPECTED\nActual:\n$ACTUAL"
fi
}
if [[ "$TEST" == "true" ]]; then
testsuite
else
inception
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment