Created
April 3, 2025 06:42
-
-
Save tkellen/40d552366246245da73d51b0350b6e17 to your computer and use it in GitHub Desktop.
monorepo folder-driven ci pipelines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Detect changed workspaces to drive a monorepo CI system. Supports bash 3.x+. The output | |
# of this script is a JSON array of workflows to run. This script is generic and can be | |
# used for any CI system. | |
SCRIPT_PATH="$(cd "$(dirname "$0")"; pwd -P)" | |
REPO_PATH=${REPO_PATH:-"$(cd "${SCRIPT_PATH}/../" && pwd -P)"} | |
set -e # exit immediately on failures | |
set -o pipefail # exit code of pipeline is code of first failure, not last | |
set -o noglob # don't expand glob strings (bash 3x auto-expands some but not others) | |
cat <<EOF >&2 | |
██╗███╗░░██╗░█████╗░███████╗██████╗░████████╗██╗░█████╗░███╗░░██╗ | |
██║████╗░██║██╔══██╗██╔════╝██╔══██╗╚══██╔══╝██║██╔══██╗████╗░██║ | |
██║██╔██╗██║██║░░╚═╝█████╗░░██████╔╝░░░██║░░░██║██║░░██║██╔██╗██║ | |
██║██║╚████║██║░░██╗██╔══╝░░██╔═══╝░░░░██║░░░██║██║░░██║██║╚████║ | |
██║██║░╚███║╚█████╔╝███████╗██║░░░░░░░░██║░░░██║╚█████╔╝██║░╚███║ | |
╚═╝╚═╝░░╚══╝░╚════╝░╚══════╝╚═╝░░░░░░░░╚═╝░░░╚═╝░╚════╝░╚═╝░░╚══╝ | |
EOF | |
# Output debug messages to stderr. | |
function debug { | |
local BLUE="\033[34m" | |
local WHITE="\033[37m" | |
local RESET="\033[0m" | |
echo -e "${BLUE}[$1]:${RESET} ${WHITE}$2${RESET}" >&2 | |
} | |
# Support finding files in per-workspace paths using various globbing patterns. | |
function search { | |
local WORKSPACE_PATH="$1" | |
local PATTERN="$2" | |
( | |
if [[ "$PATTERN" == "**/*"* ]]; then | |
find "$WORKSPACE_PATH" -type f -path "$PATTERN" -exec readlink -f {} \; | |
elif [[ "$PATTERN" == *"**/*"* ]]; then | |
DIR="${PATTERN%/**/*}" | |
MATCH="${PATTERN##*/}" | |
find "$WORKSPACE_PATH/$DIR" -type f -name "$MATCH" -exec readlink -f {} \; | |
elif [[ "$PATTERN" == *"/*"* ]]; then | |
find "$WORKSPACE_PATH" -type f -path "$WORKSPACE_PATH/$PATTERN" -exec readlink -f {} \; | |
else | |
find "$WORKSPACE_PATH" -maxdepth 1 -type f -name "$PATTERN" -exec readlink -f {} \; | |
fi | |
) || true | |
} | |
# Go go go! | |
function inception { | |
: "${WORKSPACE_ROOT?must be defined}" | |
BASE_BRANCH=${BASE_BRANCH:-main} | |
BEFORE_COMMIT=${BEFORE_COMMIT:-0000000000000000000000000000000000000000} | |
CURRENT_COMMIT=${CURRENT_COMMIT:-HEAD} | |
MANIFEST_FILE=${MANIFEST_FILE:-Inceptionfile} | |
# The first step in making a CI system capable of handling multiple projects from a single | |
# repository is to introduce logic to detect which projects were affected by a given commit. | |
# This implies knowing which files have changed from one commit to the next, as well as which | |
# files a given project is "watching". This sounds simple. In practice it can be rather complex. | |
# | |
# Most CI system provide access to the current and previous SHA of the commit that triggered a | |
# given CI run. In the most basic case, which we assume first, the branch being pushed to already | |
# exists and we are simply adding commits to it. The previous SHA is the previous commit to the | |
# current branch. | |
COMMIT_TYPE="standard-push" | |
BASE_COMMIT="$BEFORE_COMMIT" | |
# If the previous commit no longer exists, we assume history has been rewritten and the commit is | |
# a force push. We treat this case the same as a new branch and consider the set of changed files | |
# to be everything different than the latest commit on main. | |
if ! git merge-base --is-ancestor "$BEFORE_COMMIT" "$CURRENT_COMMIT" 2>/dev/null; then | |
COMMIT_TYPE="force-push" | |
BASE_COMMIT=$(git merge-base "$CURRENT_COMMIT" "origin/$BASE_BRANCH") | |
fi | |
# In the case of pushing a new branch, the SHA of the previous commit is open to interpretation. | |
# Is it the latest commit from main? What happens if you make a branch of a branch? What happens | |
# if you delete the branch that you branched from before you push the new branch? For the | |
# purposes of this script, we assume a new branch has no previous commit (sometimes represented | |
# in CI systems as all zeros) and that we want to compare the new branch to the `main` branch by | |
# default to determine the files changed. | |
if [[ "$BEFORE_COMMIT" = "0000000000000000000000000000000000000000" ]]; then | |
COMMIT_TYPE="new-branch" | |
BASE_COMMIT=$(git merge-base "$CURRENT_COMMIT" "origin/$BASE_BRANCH") | |
fi | |
cd "$REPO_PATH" | |
# A workspace path is a folder that contains a manifest file that defines workflows and which | |
# files should trigger them. | |
WORKSPACE_PATH=($(find "$WORKSPACE_ROOT" -type f -name "$MANIFEST_FILE" -exec dirname {} \;)) | |
WORKSPACE_FILES=() | |
# Iterate all workspaces resolving paths to each watched file so we can check to see if they | |
# have changed. | |
for WORKSPACE_PATH in "${WORKSPACE_PATH[@]}"; do | |
# Read the manifest file for metadata. | |
NAME=$(yq ".workspace.name" "${WORKSPACE_PATH}/${MANIFEST_FILE}") | |
WORKFLOWS=($(yq '.workspace.workflows | keys[]' "${WORKSPACE_PATH}/${MANIFEST_FILE}")) | |
# A workspace can trigger arbitrary workflows. Find which workflows exist in this workspace. | |
for WORKFLOW in "${WORKFLOWS[@]}"; do | |
# Each workflow defines which files changing should trigger it. Find each of these for this workflow. | |
TRIGGERS=($(yq ".workspace.workflows.$WORKFLOW.paths[]" "${WORKSPACE_PATH}/${MANIFEST_FILE}")) | |
# Grab which CI runner should be used early, this is sometimes needed statically (e.g. github actions) | |
RUNS_ON=$(yq -I=0 -o json ".workspace.workflows.$WORKFLOW.runs-on" "${WORKSPACE_PATH}/${MANIFEST_FILE}"); | |
for TRIGGER in "${TRIGGERS[@]}"; do | |
# Expand trigger patterns (e.g. src/**/*) to a list of files that match. | |
FILES=($(search "$WORKSPACE_PATH" "$TRIGGER" | sed "s|^$REPO_PATH/||")) | |
# Annotate each file with details about the workspace and workflow it belongs to. | |
for FILE in "${FILES[@]}"; do | |
[[ -n "$FILE" ]] && WORKSPACE_FILES+=("$FILE|${WORKSPACE_PATH}|$NAME|$WORKFLOW|$RUNS_ON") | |
done | |
done | |
done | |
done | |
# Populate identically indexed WATCHED_FILES and WORKFLOW_CONTEXTS (no maps in bash 3.x, boo). | |
WATCHED_FILES=() | |
WORKFLOW_CONTEXTS=() | |
for ITEM in "${WORKSPACE_FILES[@]}"; do | |
IFS="|" read -r FILE BASE_PATH NAME WORKFLOW RUNS_ON <<< "$ITEM" | |
CONTEXT="{\"name\":\"$NAME\",\"workflow\":\"$WORKFLOW\",\"base-path\":\"${BASE_PATH}\",\"manifest\":\"${BASE_PATH}/${MANIFEST_FILE}\",\"runs-on\":$RUNS_ON}" | |
WATCHED_FILES+=("$FILE") | |
WORKFLOW_CONTEXTS+=("$CONTEXT") | |
done | |
# Ask git to tell us which files have actually changed. | |
CHANGED_FILES=($(git diff --name-only $BASE_COMMIT $CURRENT_COMMIT)) | |
RUN=() | |
# Check all changed files to determine which workflows to trigger. | |
for CHANGED in "${CHANGED_FILES[@]}"; do | |
for i in "${!WATCHED_FILES[@]}"; do | |
FILE="${WATCHED_FILES[$i]}" | |
CONTEXT="${WORKFLOW_CONTEXTS[$i]}" | |
# If any of the watched files match those which have changed, record the context for it | |
# in the list of things to run. Only match once per workflow to make this "fast". | |
if [[ "$CHANGED" == "$FILE"* && ! " ${RUN[*]} " =~ " $CONTEXT " ]]; then | |
RUN+=("$CONTEXT") | |
fi | |
done | |
done | |
# Output useful debugging info on stderr for humans running output from this script. | |
for var in BASE_BRANCH BEFORE_COMMIT CURRENT_COMMIT REPO_PATH WORKSPACE_ROOT MANIFEST_FILE COMMIT_TYPE BASE_COMMIT; do | |
debug "$var" "${!var}" | |
done | |
debug "CHANGED_FILES" "$(printf "\n%s" "${CHANGED_FILES[@]}" | sed 's/^/ /')" | |
# Output a JSON array of affected workspaces and workflows for consumption by a CI system. | |
if [[ ${#RUN[@]} -ne 0 ]]; then | |
debug "RUN" "\n$(echo "${RUN[@]}" | tr ' ' '\n' | jq -r '.name+" ("+(.workflow)+")"' | sed 's/^/ /')" | |
printf "[$(printf '%s,' "${RUN[@]}" | sed 's/,$//')]" | |
else | |
printf "[]" | |
fi | |
} | |
# Here is a little test suite to make sure pattern matching does what is expected. | |
function testsuite { | |
cd "$REPO_PATH" | |
mkdir -p test/pkg/fixtures | |
touch test/.gitignore test/go.mod test/go.sum test/main.go test/README.md | |
touch test/pkg/main.go test/pkg/main_test.go test/pkg/fixtures/test.json | |
printf "Testing glob function on mock workspace:\n\n" | |
tree -na --noreport test | |
printf "\n" | |
validate "none" "none" "" | |
validate "single" "go.mod" "test/go.mod" | |
validate "star" "*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go" | |
validate "double-star" "**" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go" | |
validate "star-dot-star" "*.*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go" | |
validate "non-recursive-scoped" "*.go" "test/main.go" | |
validate "recursive-all" "**/*" "test/go.mod test/go.sum test/README.md test/.gitignore test/main.go test/pkg/fixtures/test.json test/pkg/main.go test/pkg/main_test.go" | |
validate "recursive-scoped" "**/*.go" "test/main.go test/pkg/main.go test/pkg/main_test.go" | |
validate "prefixed-recursive-all" "pkg/**/*" "test/pkg/fixtures/test.json test/pkg/main.go test/pkg/main_test.go" | |
validate "prefixed-non-recursive-scoped" "pkg/*.go" "test/pkg/main.go test/pkg/main_test.go" | |
validate "prefixed-recursive-scoped" "pkg/**/*.go" "test/pkg/main.go test/pkg/main_test.go" | |
rm -rf test | |
} | |
function validate { | |
local TITLE="$1" | |
local PATTERN="$2" | |
local EXPECTED="$3" | |
printf "test $TITLE ($PATTERN): " | |
EXPECTED="$(echo "$EXPECTED" | tr ' ' '\n')" | |
ACTUAL=$(search "test" "$PATTERN" | sed "s|^$REPO_PATH/||") | |
if [[ "$EXPECTED" == "$ACTUAL" ]]; then | |
printf "OK\n" | |
#printf "$ACTUAL\n" | sed 's/^/ /' | |
else | |
printf "FAIL\nExpected:\n$EXPECTED\nActual:\n$ACTUAL" | |
fi | |
} | |
if [[ "$TEST" == "true" ]]; then | |
testsuite | |
else | |
inception | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment