Last active
March 25, 2026 18:42
-
-
Save ericboehs/e408597d5e80f99ed95b17f8e334aff1 to your computer and use it in GitHub Desktop.
gh-action-trace — Find all direct and transitive uses of GitHub Actions in an org. Traces dependency chains through shared/reusable workflows and reports pinning status (SHA/tag/branch).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # gh-action-trace — Find all direct and transitive uses of GitHub Actions in an org. | |
| # | |
| # Traces the full dependency chain: repos that directly reference target actions, | |
| # repos that call shared/reusable workflows containing those actions, and external | |
| # shared workflows that wrap them. Reports pinning status (SHA/tag/branch) for each. | |
| # | |
| # Requires: gh (GitHub CLI), jq, base64 | |
| # | |
| # Usage: | |
| # gh-action-trace --org department-of-veterans-affairs --action aquasecurity/trivy-action | |
| # gh-action-trace --org my-org --action actions/checkout --action actions/setup-node | |
| # gh-action-trace --org my-org --action aquasecurity/trivy-action --depth 3 --format json | |
| # | |
| # Options: | |
| # --org ORG GitHub org to search (required) | |
| # --action ACTION Action to trace — repeatable (required, at least one) | |
| # --depth N Max recursion depth for shared workflows (default: 2) | |
| # --format FORMAT Output format: text, json, both (default: both) | |
| # --external Also search all of GitHub for external shared workflows (slower) | |
| # --output FILE Write JSON output to file (default: stdout) | |
| # --check-runs FROM..TO Check workflow run history during a time window (ISO 8601) | |
| # e.g. --check-runs 2026-03-19T19:00:00Z..2026-03-21T00:00:00Z | |
| # Omit TO to default to now: --check-runs 2026-03-19T19:00:00Z.. | |
| # --quiet Suppress progress output (only show results) | |
| # --verbose Show detailed debug info | |
| # --help Show this help | |
| set -euo pipefail | |
| # ============================================================================= | |
| # Configuration | |
| # ============================================================================= | |
| declare -a TARGET_ACTIONS=() | |
| ORG="" | |
| MAX_DEPTH=2 | |
| FORMAT="both" | |
| SEARCH_EXTERNAL=false | |
| OUTPUT_FILE="" | |
| CHECK_RUNS="" | |
| CHECK_RUNS_FROM="" | |
| CHECK_RUNS_TO="" | |
| QUIET=false | |
| VERBOSE=false | |
| # Rate limiting — modeled after github-viewer's approach | |
| # Note: code_search resource has a 10/min limit (not 30 like regular search) | |
| # These are stored as files (in COUNTER_DIR) to survive subshell boundaries. | |
| SEARCH_CRITICAL_THRESHOLD=2 | |
| SEARCH_WARNING_THRESHOLD=4 | |
| # Retry config (exponential backoff for server errors) | |
| MAX_RETRIES=3 | |
| RETRY_BACKOFF_BASE=2 | |
| # Counter/state directory — set in main(), all mutable state lives here | |
| COUNTER_DIR="" | |
| # Results accumulator (newline-delimited JSON objects) | |
| RESULTS_FILE="" | |
| CACHE_DIR="" | |
| # Visited sets (prevent infinite loops and redundant work) | |
| VISITED_WORKFLOWS_FILE="" | |
| VISITED_SEARCHES_FILE="" | |
| # ============================================================================= | |
| # Utility functions | |
| # ============================================================================= | |
| log() { if [[ "$VERBOSE" == true ]]; then printf '[%s] %s\n' "$(date +%H:%M:%S)" "$*" >&2; fi; } | |
| progress() { if [[ "$QUIET" != true ]]; then printf '%s\n' "$*" >&2; fi; } | |
| warn() { printf '[WARN] %s\n' "$*" >&2; } | |
| die() { printf '[ERROR] %s\n' "$*" >&2; exit 1; } | |
| # Inline progress bar that overwrites the current line. | |
| # Args: $1 = current, $2 = total, $3 = found count, $4 = label | |
| progress_bar() { | |
| [[ "$QUIET" == true ]] && return | |
| local current="$1" total="$2" found="$3" label="$4" | |
| local width=20 pct=0 | |
| if [[ "$total" -gt 0 ]]; then | |
| pct=$(( current * 100 / total )) | |
| fi | |
| local filled=$(( pct * width / 100 )) | |
| local empty=$(( width - filled )) | |
| local bar | |
| bar="$(printf '%*s' "$filled" '' | tr ' ' '█')$(printf '%*s' "$empty" '' | tr ' ' '░')" | |
| printf '\r\033[K %s [%d/%d] %s %d found' "$label" "$current" "$total" "$bar" "$found" >&2 | |
| if [[ "$current" -ge "$total" ]]; then | |
| printf '\n' >&2 | |
| fi | |
| } | |
| # Show a rate limit wait message on the progress line. | |
| progress_wait() { | |
| [[ "$QUIET" == true ]] && return | |
| local secs="$1" | |
| printf '\r\033[K Rate limited. Waiting %ds for reset...' "$secs" >&2 | |
| } | |
| usage() { | |
| sed -n '/^# Usage:/,/^[^#]/{ /^[^#]/d; s/^# \?//; p; }' "$0" | |
| exit 0 | |
| } | |
| # Hide cursor on start, restore on exit/Ctrl-C. | |
| show_cursor() { | |
| tput cnorm 2>/dev/null || true | |
| } | |
| hide_cursor() { | |
| tput civis 2>/dev/null || true | |
| } | |
| cleanup() { | |
| show_cursor | |
| if [[ -n "$CACHE_DIR" && -d "$CACHE_DIR" ]]; then | |
| rm -rf "$CACHE_DIR" | |
| log "Cleaned up cache dir: $CACHE_DIR" | |
| fi | |
| } | |
| trap cleanup EXIT INT TERM | |
| # File-based counters to survive subshell boundaries (pipe | while read). | |
| # Each counter is a file containing a number; increment atomically. | |
| counter_init() { | |
| local name="$1" | |
| echo "0" > "${COUNTER_DIR}/${name}" | |
| } | |
| counter_inc() { | |
| local name="$1" | |
| local file="${COUNTER_DIR}/${name}" | |
| local val | |
| val=$(<"$file") | |
| echo $(( val + 1 )) > "$file" | |
| } | |
| counter_dec() { | |
| local name="$1" | |
| local file="${COUNTER_DIR}/${name}" | |
| local val | |
| val=$(<"$file") | |
| echo $(( val - 1 )) > "$file" | |
| } | |
| counter_get() { | |
| local name="$1" | |
| cat "${COUNTER_DIR}/${name}" | |
| } | |
| counter_set() { | |
| local name="$1" val="$2" | |
| echo "$val" > "${COUNTER_DIR}/${name}" | |
| } | |
| # ============================================================================= | |
| # Argument parsing | |
| # ============================================================================= | |
| parse_args() { | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| --org) ORG="$2"; shift 2 ;; | |
| --action) TARGET_ACTIONS+=("$2"); shift 2 ;; | |
| --depth) MAX_DEPTH="$2"; shift 2 ;; | |
| --format) FORMAT="$2"; shift 2 ;; | |
| --external) SEARCH_EXTERNAL=true; shift ;; | |
| --output) OUTPUT_FILE="$2"; shift 2 ;; | |
| --check-runs) | |
| CHECK_RUNS="$2" | |
| CHECK_RUNS_FROM="${2%%\.\.*}" | |
| CHECK_RUNS_TO="${2##*\.\.}" | |
| if [[ -z "$CHECK_RUNS_TO" ]]; then | |
| CHECK_RUNS_TO=$(date -u +%Y-%m-%dT%H:%M:%SZ) | |
| fi | |
| shift 2 ;; | |
| --quiet|-q) QUIET=true; shift ;; | |
| --verbose) VERBOSE=true; shift ;; | |
| --help|-h) usage ;; | |
| *) die "Unknown option: $1" ;; | |
| esac | |
| done | |
| [[ -n "$ORG" ]] || die "Missing required --org" | |
| [[ ${#TARGET_ACTIONS[@]} -gt 0 ]] || die "Missing required --action (at least one)" | |
| [[ "$FORMAT" =~ ^(text|json|both)$ ]] || die "Invalid --format: $FORMAT (must be text, json, or both)" | |
| [[ "$MAX_DEPTH" =~ ^[0-9]+$ ]] || die "Invalid --depth: $MAX_DEPTH (must be a number)" | |
| } | |
| # ============================================================================= | |
| # Rate limiting | |
| # ============================================================================= | |
| # Refresh rate limit info from the API (costs 1 core API call). | |
| refresh_rate_limit() { | |
| local info | |
| info=$(gh api '/rate_limit' 2>/dev/null) || return 1 | |
| counter_inc core_calls | |
| local search_info | |
| search_info=$(echo "$info" | jq '.resources.code_search // .resources.search // empty') | |
| if [[ -n "$search_info" ]]; then | |
| local remaining limit reset | |
| remaining=$(echo "$search_info" | jq -r '.remaining') | |
| limit=$(echo "$search_info" | jq -r '.limit') | |
| reset=$(echo "$search_info" | jq -r '.reset') | |
| counter_set search_remaining "$remaining" | |
| counter_set search_limit "$limit" | |
| counter_set search_reset "$reset" | |
| log "Rate limit refreshed: search ${remaining}/${limit} (resets $(date -r "${reset}" +%H:%M:%S 2>/dev/null || echo "at ${reset}"))" | |
| fi | |
| local core_info | |
| core_info=$(echo "$info" | jq '.resources.core // empty') | |
| if [[ -n "$core_info" ]]; then | |
| counter_set core_remaining "$(echo "$core_info" | jq -r '.remaining')" | |
| fi | |
| } | |
| # Pre-emptively wait if we're approaching the search rate limit. | |
| throttle_search() { | |
| local search_calls remaining limit reset | |
| search_calls=$(counter_get search_calls) | |
| # Refresh from API every 3 search calls to stay accurate | |
| if (( search_calls % 3 == 0 && search_calls > 0 )); then | |
| refresh_rate_limit | |
| fi | |
| remaining=$(counter_get search_remaining) | |
| limit=$(counter_get search_limit) | |
| reset=$(counter_get search_reset) | |
| if [[ "$remaining" -le "$SEARCH_CRITICAL_THRESHOLD" ]]; then | |
| refresh_rate_limit | |
| remaining=$(counter_get search_remaining) | |
| reset=$(counter_get search_reset) | |
| if [[ "$remaining" -le "$SEARCH_CRITICAL_THRESHOLD" && "$reset" -gt 0 ]]; then | |
| local now wait_secs | |
| now=$(date +%s) | |
| wait_secs=$(( reset - now + 1 )) | |
| if [[ "$wait_secs" -gt 0 && "$wait_secs" -le 120 ]]; then | |
| progress_wait "$wait_secs" | |
| sleep "$wait_secs" | |
| counter_set search_remaining "$limit" | |
| fi | |
| fi | |
| elif [[ "$remaining" -le "$SEARCH_WARNING_THRESHOLD" ]]; then | |
| log "Search rate limit getting low (${remaining}/${limit} remaining). Adding 3s delay." | |
| sleep 3 | |
| fi | |
| } | |
| # ============================================================================= | |
| # GitHub API wrappers | |
| # ============================================================================= | |
| # Make a GitHub API call with rate limit tracking and retry logic. | |
| # Args: $1 = resource type (search|core), remaining args passed to gh api | |
| # Outputs: response body on stdout | |
| gh_api() { | |
| local resource="$1"; shift | |
| local retries=0 | |
| local exit_code | |
| if [[ "$resource" == "search" ]]; then | |
| throttle_search | |
| fi | |
| local header_file="${CACHE_DIR}/last_headers.txt" | |
| while true; do | |
| local body | |
| # gh api outputs JSON body to stdout, errors to stderr | |
| body=$(gh api "$@" 2>"${header_file}.err") && exit_code=0 || exit_code=$? | |
| if [[ $exit_code -eq 0 ]]; then | |
| case "$resource" in | |
| search) | |
| counter_inc search_calls | |
| counter_dec search_remaining | |
| log "Search calls: $(counter_get search_calls) (est. remaining: $(counter_get search_remaining))" | |
| ;; | |
| core) | |
| counter_inc core_calls | |
| counter_dec core_remaining | |
| ;; | |
| esac | |
| echo "$body" | |
| return 0 | |
| fi | |
| local err_msg | |
| err_msg=$(cat "${header_file}.err" 2>/dev/null || echo "unknown error") | |
| # Check for rate limit error | |
| if echo "$err_msg" | grep -qi "rate limit\|API rate limit\|secondary rate limit\|abuse detection"; then | |
| # Try to get actual rate limit reset time | |
| local rate_info | |
| rate_info=$(gh api '/rate_limit' --jq ".resources.code_search // .resources.search // .resources.core" 2>/dev/null || true) | |
| if [[ -n "$rate_info" ]]; then | |
| local reset_at remaining | |
| reset_at=$(echo "$rate_info" | jq -r '.reset // 0') | |
| remaining=$(echo "$rate_info" | jq -r '.remaining // 0') | |
| counter_set search_remaining "$remaining" | |
| counter_set search_reset "$reset_at" | |
| local now wait_secs | |
| now=$(date +%s) | |
| wait_secs=$(( reset_at - now + 1 )) | |
| if [[ "$wait_secs" -gt 0 && "$wait_secs" -le 120 ]]; then | |
| progress_wait "$wait_secs" | |
| sleep "$wait_secs" | |
| counter_set search_remaining "$(counter_get search_limit)" | |
| continue | |
| fi | |
| fi | |
| progress_wait 60 | |
| sleep 60 | |
| continue | |
| fi | |
| # Check for server error (5xx) — retry with exponential backoff | |
| if echo "$err_msg" | grep -qE "50[0-9]|502|503"; then | |
| if [[ $retries -lt $MAX_RETRIES ]]; then | |
| retries=$((retries + 1)) | |
| local delay=$(( RETRY_BACKOFF_BASE ** retries )) | |
| warn "Server error. Retrying in ${delay}s (attempt ${retries}/${MAX_RETRIES})" | |
| sleep "$delay" | |
| continue | |
| fi | |
| fi | |
| # Unrecoverable error | |
| warn "API call failed: $err_msg" | |
| return 1 | |
| done | |
| } | |
| # Search code with pagination. Returns all items as a JSON array. | |
| # Args: $1 = query string | |
| search_code() { | |
| local query="$1" | |
| local all_items="[]" | |
| local page=1 | |
| local per_page=100 | |
| log "Searching: $query" | |
| while true; do | |
| local result | |
| result=$(gh_api search -X GET '/search/code' \ | |
| -f q="$query" \ | |
| -F per_page="$per_page" \ | |
| -F page="$page" 2>/dev/null) || break | |
| local total_count items_count | |
| total_count=$(echo "$result" | jq -r '.total_count // 0') | |
| items_count=$(echo "$result" | jq -r '.items | length') | |
| if [[ "$items_count" -eq 0 ]]; then | |
| break | |
| fi | |
| # Merge items | |
| all_items=$(echo "$all_items" "$result" | jq -s '.[0] + (.[1].items // [])') | |
| local accumulated | |
| accumulated=$(echo "$all_items" | jq 'length') | |
| log " Page $page: got $items_count items (${accumulated}/${total_count} total)" | |
| # Check if we've got everything or hit the 1000-result cap | |
| if [[ "$accumulated" -ge "$total_count" || "$accumulated" -ge 1000 ]]; then | |
| if [[ "$total_count" -gt 1000 ]]; then | |
| warn "Search returned ${total_count} results but GitHub caps at 1000. Results may be incomplete." | |
| fi | |
| break | |
| fi | |
| page=$((page + 1)) | |
| done | |
| echo "$all_items" | |
| } | |
| # Fetch a file's content from a repo. Uses cache. | |
| # Args: $1 = owner/repo, $2 = file path | |
| # Outputs: decoded file content on stdout | |
| fetch_content() { | |
| local repo="$1" path="$2" | |
| local cache_key | |
| cache_key=$(echo "${repo}/${path}" | tr '/' '_' | tr '.' '_') | |
| local cache_file="${CACHE_DIR}/content_${cache_key}" | |
| if [[ -f "$cache_file" ]]; then | |
| counter_inc cache_hits | |
| cat "$cache_file" | |
| return 0 | |
| fi | |
| local result | |
| result=$(gh_api core -X GET "/repos/${repo}/contents/${path}" \ | |
| --jq '.content // empty' 2>/dev/null) || return 1 | |
| if [[ -z "$result" ]]; then | |
| return 1 | |
| fi | |
| local decoded | |
| decoded=$(echo "$result" | base64 -d 2>/dev/null) || return 1 | |
| echo "$decoded" > "$cache_file" | |
| echo "$decoded" | |
| } | |
| # ============================================================================= | |
| # Analysis functions | |
| # ============================================================================= | |
| # Extract all `uses:` references from workflow content. | |
| # Outputs: one JSON object per line: {"ref": "owner/repo/path@version", "raw": "original line"} | |
| extract_uses_refs() { | |
| local content="$1" | |
| echo "$content" | grep -E '^[[:space:]]+uses:[[:space:]]' | while IFS= read -r line; do | |
| local ref | |
| ref=$(echo "$line" | sed -E 's/.*uses:[[:space:]]*"?([^"[:space:]#]+)"?.*/\1/' | xargs) | |
| # Skip local references (./path) | |
| if [[ "$ref" == ./* || "$ref" == ../* ]]; then | |
| continue | |
| fi | |
| # Skip Docker references | |
| if [[ "$ref" == docker://* ]]; then | |
| continue | |
| fi | |
| echo "$ref" | |
| done | |
| } | |
| # Classify a version reference as sha, tag, or branch. | |
| # Args: $1 = full uses reference (e.g., "actions/checkout@v4" or "org/repo@abc123...") | |
| # Outputs: JSON object with classification | |
| classify_pin() { | |
| local ref="$1" | |
| local action_part="${ref%%@*}" | |
| local version="${ref#*@}" | |
| [[ "$ref" != *@* ]] && version="" | |
| local pin_type pin_risk | |
| if [[ -z "$version" ]]; then | |
| pin_type="none"; pin_risk="critical" | |
| elif [[ "$version" =~ ^[0-9a-f]{40}$ ]]; then | |
| pin_type="sha"; pin_risk="safe" | |
| elif [[ "$version" =~ ^v?[0-9] ]]; then | |
| pin_type="tag"; pin_risk="risky" | |
| else | |
| pin_type="branch"; pin_risk="dangerous" | |
| fi | |
| jq -n \ | |
| --arg action "$action_part" \ | |
| --arg version "$version" \ | |
| --arg pin_type "$pin_type" \ | |
| --arg pin_risk "$pin_risk" \ | |
| '{action: $action, version: $version, pin_type: $pin_type, pin_risk: $pin_risk}' | |
| } | |
| # Check if workflow content defines a reusable workflow (has workflow_call trigger). | |
| # Args: $1 = workflow content | |
| # Returns: 0 if reusable, 1 if not | |
| is_reusable_workflow() { | |
| local content="$1" | |
| echo "$content" | grep -qE '^[[:space:]]*workflow_call[[:space:]]*:?[[:space:]]*$|on:.*workflow_call|"workflow_call"' | |
| } | |
| # ============================================================================= | |
| # Result recording | |
| # ============================================================================= | |
| # Add a result to the results file. | |
| # Args: JSON object on stdin or as $1 | |
| add_result() { | |
| local json="${1:-$(cat)}" | |
| echo "$json" >> "$RESULTS_FILE" | |
| counter_inc found | |
| } | |
| # Check if a workflow has already been visited (prevent loops). | |
| # Args: $1 = identifier (e.g., "org/repo/.github/workflows/file.yml") | |
| # Returns: 0 if already visited, 1 if new | |
| is_visited() { | |
| local id="$1" | |
| grep -qxF "$id" "$VISITED_WORKFLOWS_FILE" 2>/dev/null | |
| } | |
| mark_visited() { | |
| local id="$1" | |
| echo "$id" >> "$VISITED_WORKFLOWS_FILE" | |
| } | |
| is_search_done() { | |
| local id="$1" | |
| grep -qxF "$id" "$VISITED_SEARCHES_FILE" 2>/dev/null | |
| } | |
| mark_search_done() { | |
| local id="$1" | |
| echo "$id" >> "$VISITED_SEARCHES_FILE" | |
| } | |
| # ============================================================================= | |
| # Core trace logic | |
| # ============================================================================= | |
| # Trace a single action through the org, finding direct and transitive references. | |
| # Args: $1 = action (e.g., "aquasecurity/trivy-action") | |
| trace_action() { | |
| local action="$1" | |
| log "=== Tracing action: $action ===" | |
| local total_phases=2 phase=1 | |
| [[ "$SEARCH_EXTERNAL" == true ]] && total_phases=3 | |
| progress " [${phase}/${total_phases}] Searching $ORG for direct references..." | |
| find_direct_refs "$action" | |
| if [[ "$SEARCH_EXTERNAL" == true ]]; then | |
| phase=$((phase + 1)) | |
| progress " [${phase}/${total_phases}] Searching all of GitHub for external shared workflows..." | |
| find_external_wrappers "$action" | |
| fi | |
| phase=$((phase + 1)) | |
| progress " [${phase}/${total_phases}] Tracing transitive references through shared workflows..." | |
| trace_shared_workflows "$action" 1 | |
| } | |
| # Find direct references to an action in the org's workflow files. | |
| # Args: $1 = action name | |
| find_direct_refs() { | |
| local action="$1" | |
| local search_key="direct:${ORG}:${action}" | |
| if is_search_done "$search_key"; then | |
| log " Skipping duplicate search: $search_key" | |
| return | |
| fi | |
| mark_search_done "$search_key" | |
| local items | |
| items=$(search_code "${action} path:.github/workflows user:${ORG} language:yaml") | |
| local count | |
| count=$(echo "$items" | jq 'length') | |
| progress " Found $count workflow files to inspect" | |
| if [[ "$count" -eq 0 ]]; then | |
| return | |
| fi | |
| # Process each result — fetch content and analyze | |
| local item_idx=0 | |
| echo "$items" | jq -c '.[]' | while IFS= read -r item; do | |
| local repo path | |
| repo=$(echo "$item" | jq -r '.repository.full_name') | |
| path=$(echo "$item" | jq -r '.path') | |
| item_idx=$((item_idx + 1)) | |
| local workflow_id="${repo}/${path}" | |
| if is_visited "$workflow_id"; then | |
| log " Skipping already-visited: $workflow_id" | |
| progress_bar "$item_idx" "$count" "$(counter_get found)" "Inspecting..." | |
| continue | |
| fi | |
| mark_visited "$workflow_id" | |
| progress_bar "$item_idx" "$count" "$(counter_get found)" "Inspecting..." | |
| log " Fetching: ${repo}/${path}" | |
| local content | |
| content=$(fetch_content "$repo" "$path" 2>/dev/null) || { | |
| warn " Could not fetch ${repo}/${path} — skipping" | |
| continue | |
| } | |
| # Find all uses: lines that match our target action | |
| local refs | |
| refs=$(extract_uses_refs "$content" | grep -F "${action}" || true) | |
| if [[ -z "$refs" ]]; then | |
| # The search matched but no uses: line contains the action directly. | |
| # Could be a comment match or partial match — skip. | |
| log " No matching uses: reference found in $workflow_id (possible false positive)" | |
| continue | |
| fi | |
| # Check if this workflow is itself reusable | |
| local reusable=false | |
| if is_reusable_workflow "$content"; then | |
| reusable=true | |
| fi | |
| # Record each matching reference | |
| while IFS= read -r ref; do | |
| local pin_info | |
| pin_info=$(classify_pin "$ref") | |
| add_result "$(jq -n \ | |
| --arg repo "$repo" \ | |
| --arg workflow "$path" \ | |
| --arg ref "$ref" \ | |
| --arg action "$action" \ | |
| --arg ref_type "direct" \ | |
| --argjson reusable "$reusable" \ | |
| --argjson pin "$pin_info" \ | |
| --arg chain "$action" \ | |
| '{ | |
| repo: $repo, | |
| workflow: $workflow, | |
| uses_ref: $ref, | |
| target_action: $action, | |
| reference_type: $ref_type, | |
| is_reusable_workflow: $reusable, | |
| pin_type: $pin.pin_type, | |
| pin_value: $pin.version, | |
| pin_risk: $pin.pin_risk, | |
| chain: [$chain] | |
| }')" | |
| done <<< "$refs" | |
| done | |
| } | |
| # Search all of GitHub for external shared workflows that wrap the target action. | |
| # Then search the org for repos calling those external workflows. | |
| # Args: $1 = action name | |
| find_external_wrappers() { | |
| local action="$1" | |
| local search_key="external:${action}" | |
| if is_search_done "$search_key"; then | |
| return | |
| fi | |
| mark_search_done "$search_key" | |
| log " Searching all of GitHub for workflows wrapping: $action" | |
| local items | |
| items=$(search_code "${action} path:.github/workflows language:yaml") | |
| local count | |
| count=$(echo "$items" | jq 'length') | |
| log " Found $count global references" | |
| if [[ "$count" -eq 0 ]]; then | |
| return | |
| fi | |
| # Find repos outside our org that contain the action in workflow files | |
| local external_repos | |
| external_repos=$(echo "$items" | jq -r --arg org "$ORG" \ | |
| '[.[] | select(.repository.full_name | startswith($org + "/") | not)] | unique_by(.repository.full_name) | .[].repository.full_name' 2>/dev/null || true) | |
| if [[ -z "$external_repos" ]]; then | |
| log " No external repos found" | |
| return | |
| fi | |
| local ext_count | |
| ext_count=$(echo "$external_repos" | wc -l | tr -d ' ') | |
| progress " Found $ext_count external repos wrapping $action" | |
| # For each external repo, check if it has reusable workflows and if our org calls them | |
| while IFS= read -r ext_repo; do | |
| [[ -z "$ext_repo" ]] && continue | |
| # Get the workflow files from this external repo that matched | |
| local ext_paths | |
| ext_paths=$(echo "$items" | jq -r --arg repo "$ext_repo" \ | |
| '.[] | select(.repository.full_name == $repo) | .path') | |
| while IFS= read -r ext_path; do | |
| [[ -z "$ext_path" ]] && continue | |
| # Fetch and check if it's a reusable workflow | |
| local content | |
| content=$(fetch_content "$ext_repo" "$ext_path" 2>/dev/null) || continue | |
| if ! is_reusable_workflow "$content"; then | |
| continue | |
| fi | |
| log " External reusable workflow found: ${ext_repo}/${ext_path}" | |
| # Search our org for callers of this external workflow | |
| local caller_ref="${ext_repo}/${ext_path}" | |
| find_callers_of_workflow "$caller_ref" "$action" 1 "$action" | |
| done <<< "$ext_paths" | |
| done <<< "$external_repos" | |
| } | |
| # Recursively find repos that call shared workflows containing the target action. | |
| # Args: $1 = action being traced | |
| # $2 = current depth | |
| trace_shared_workflows() { | |
| local action="$1" | |
| local depth="$2" | |
| if [[ "$depth" -gt "$MAX_DEPTH" ]]; then | |
| log " Max depth ($MAX_DEPTH) reached — stopping recursion" | |
| return | |
| fi | |
| # Find all reusable workflows in our results that directly reference this action | |
| local reusable_workflows="" | |
| if [[ -s "$RESULTS_FILE" ]]; then | |
| reusable_workflows=$(jq -r --arg action "$action" \ | |
| 'select(.target_action == $action and .is_reusable_workflow == true and .reference_type == "direct") | "\(.repo)/\(.workflow)"' \ | |
| "$RESULTS_FILE" 2>/dev/null | sort -u || true) | |
| fi | |
| if [[ -z "$reusable_workflows" ]]; then | |
| log " No reusable workflows found at depth $depth" | |
| return | |
| fi | |
| local rw_total rw_idx=0 | |
| rw_total=$(echo "$reusable_workflows" | wc -l | tr -d ' ') | |
| progress " Found $rw_total reusable workflows to trace" | |
| while IFS= read -r workflow_path; do | |
| [[ -z "$workflow_path" ]] && continue | |
| rw_idx=$((rw_idx + 1)) | |
| progress_bar "$rw_idx" "$rw_total" "$(counter_get found)" "Tracing callers..." | |
| find_callers_of_workflow "$workflow_path" "$action" "$depth" "$action" | |
| done <<< "$reusable_workflows" | |
| } | |
| # Find all repos in the org that call a given reusable workflow. | |
| # Args: $1 = workflow ref (e.g., "org/repo/.github/workflows/file.yml") | |
| # $2 = original target action | |
| # $3 = current depth | |
| # $4 = chain so far (comma-separated) | |
| find_callers_of_workflow() { | |
| local workflow_ref="$1" | |
| local target_action="$2" | |
| local depth="$3" | |
| local chain_base="$4" | |
| local search_key="callers:${ORG}:${workflow_ref}" | |
| if is_search_done "$search_key"; then | |
| log " Skipping duplicate caller search: $workflow_ref" | |
| return | |
| fi | |
| mark_search_done "$search_key" | |
| # Build search query — search for the workflow reference in our org | |
| # Use the most specific part that's unique enough | |
| local search_term="$workflow_ref" | |
| log " Searching ${ORG} for callers of: $workflow_ref" | |
| local items | |
| items=$(search_code "${search_term} path:.github/workflows user:${ORG} language:yaml") | |
| local count | |
| count=$(echo "$items" | jq 'length') | |
| log " Found $count potential callers" | |
| if [[ "$count" -eq 0 ]]; then | |
| return | |
| fi | |
| echo "$items" | jq -c '.[]' | while IFS= read -r item; do | |
| local repo path | |
| repo=$(echo "$item" | jq -r '.repository.full_name') | |
| path=$(echo "$item" | jq -r '.path') | |
| local caller_id="${repo}/${path}->$(echo "$workflow_ref" | md5sum | cut -c1-8 2>/dev/null || md5 -q -s "$workflow_ref" 2>/dev/null || echo "$workflow_ref")" | |
| if is_visited "$caller_id"; then | |
| continue | |
| fi | |
| mark_visited "$caller_id" | |
| log " Fetching caller: ${repo}/${path}" | |
| local content | |
| content=$(fetch_content "$repo" "$path" 2>/dev/null) || { | |
| warn " Could not fetch ${repo}/${path} — skipping" | |
| continue | |
| } | |
| # Verify this workflow actually calls the target shared workflow | |
| local matching_refs | |
| matching_refs=$(extract_uses_refs "$content" | grep -F "$(basename "${workflow_ref%%@*}" .yml)" || true) | |
| if [[ -z "$matching_refs" ]]; then | |
| log " No matching workflow_call reference found in ${repo}/${path} (false positive)" | |
| continue | |
| fi | |
| # Check if this caller is itself a reusable workflow | |
| local reusable=false | |
| if is_reusable_workflow "$content"; then | |
| reusable=true | |
| fi | |
| while IFS= read -r ref; do | |
| local pin_info | |
| pin_info=$(classify_pin "$ref") | |
| local chain_array | |
| chain_array=$(jq -n --arg wf "$workflow_ref" --arg action "$target_action" '[$wf, $action]') | |
| add_result "$(jq -n \ | |
| --arg repo "$repo" \ | |
| --arg workflow "$path" \ | |
| --arg ref "$ref" \ | |
| --arg action "$target_action" \ | |
| --arg ref_type "indirect" \ | |
| --argjson reusable "$reusable" \ | |
| --argjson pin "$pin_info" \ | |
| --argjson chain "$chain_array" \ | |
| --arg via "$workflow_ref" \ | |
| '{ | |
| repo: $repo, | |
| workflow: $workflow, | |
| uses_ref: $ref, | |
| target_action: $action, | |
| reference_type: $ref_type, | |
| is_reusable_workflow: $reusable, | |
| pin_type: $pin.pin_type, | |
| pin_value: $pin.version, | |
| pin_risk: $pin.pin_risk, | |
| chain: $chain, | |
| via_workflow: $via | |
| }')" | |
| done <<< "$matching_refs" | |
| # If this caller is also a reusable workflow, recurse | |
| if [[ "$reusable" == true && "$depth" -lt "$MAX_DEPTH" ]]; then | |
| local next_ref="${repo}/${path}" | |
| log " Recursing into reusable caller: $next_ref (depth $((depth + 1)))" | |
| find_callers_of_workflow "$next_ref" "$target_action" $((depth + 1)) "${chain_base},${workflow_ref}" | |
| fi | |
| done | |
| } | |
| # ============================================================================= | |
| # Leaf pin enrichment | |
| # ============================================================================= | |
| # For indirect references, resolve the leaf action's actual pin status. | |
| # An indirect ref like vets-api -> vsp-github-actions/sbom.yml@main -> trivy-action@0.35.0 | |
| # should report the leaf pin (tag 0.35.0) not the caller's pin (branch main). | |
| enrich_leaf_pins() { | |
| [[ ! -s "$RESULTS_FILE" ]] && return | |
| progress "Resolving leaf pin status for indirect references..." | |
| local enriched_file="${CACHE_DIR}/results_leaf.jsonl" | |
| # Build a lookup of direct results: keyed by "repo/workflow" | |
| # These contain the actual trivy-action pin | |
| local direct_lookup | |
| direct_lookup=$(jq -s ' | |
| [.[] | select(.reference_type == "direct")] | |
| | group_by("\(.repo)/\(.workflow)") | |
| | map({key: "\(.[0].repo)/\(.[0].workflow)", value: .[0]}) | |
| | from_entries | |
| ' "$RESULTS_FILE") | |
| # Enrich each indirect result with leaf pin from its via_workflow | |
| jq -c --argjson lookup "$direct_lookup" ' | |
| if .reference_type == "indirect" and .via_workflow then | |
| ($lookup[.via_workflow] // null) as $leaf | | |
| if $leaf then | |
| . + { | |
| leaf_pin_type: $leaf.pin_type, | |
| leaf_pin_value: $leaf.pin_value, | |
| leaf_pin_risk: $leaf.pin_risk, | |
| leaf_uses_ref: $leaf.uses_ref | |
| } | |
| else . end | |
| else | |
| # Direct refs: the pin IS the leaf pin | |
| . + { | |
| leaf_pin_type: .pin_type, | |
| leaf_pin_value: .pin_value, | |
| leaf_pin_risk: .pin_risk, | |
| leaf_uses_ref: .uses_ref | |
| } | |
| end | |
| ' "$RESULTS_FILE" > "$enriched_file" | |
| mv "$enriched_file" "$RESULTS_FILE" | |
| } | |
| # ============================================================================= | |
| # Workflow run checking | |
| # ============================================================================= | |
| # Check if at-risk workflows ran during the specified time window. | |
| # Enriches RESULTS_FILE entries with run_count and ran_during_window fields. | |
| check_workflow_runs() { | |
| [[ -z "$CHECK_RUNS_FROM" ]] && return | |
| progress "Checking workflow runs during ${CHECK_RUNS_FROM}..${CHECK_RUNS_TO}..." | |
| # Get unique repo+workflow pairs that are at risk (leaf pin is not SHA) | |
| local at_risk | |
| at_risk=$(jq -r 'select(.leaf_pin_type != "sha" and .leaf_pin_type != null) | "\(.repo)\t\(.workflow)"' "$RESULTS_FILE" | sort -u) | |
| if [[ -z "$at_risk" ]]; then | |
| progress " No at-risk workflows to check" | |
| return | |
| fi | |
| local total pair_idx=0 hits=0 | |
| total=$(echo "$at_risk" | wc -l | tr -d ' ') | |
| # Create enriched results file | |
| local enriched_file="${CACHE_DIR}/results_enriched.jsonl" | |
| cp "$RESULTS_FILE" "$enriched_file" | |
| while IFS=$'\t' read -r repo workflow; do | |
| [[ -z "$repo" ]] && continue | |
| pair_idx=$((pair_idx + 1)) | |
| progress_bar "$pair_idx" "$total" "$hits" "Checking runs..." | |
| local wf_name | |
| wf_name=$(basename "$workflow") | |
| local run_count | |
| run_count=$(gh api "repos/${repo}/actions/workflows/${wf_name}/runs?created=${CHECK_RUNS_FROM}..${CHECK_RUNS_TO}&per_page=1" \ | |
| --jq '.total_count // 0' 2>/dev/null) || run_count=0 | |
| # Ensure it's a number | |
| if ! [[ "$run_count" =~ ^[0-9]+$ ]]; then | |
| run_count=0 | |
| fi | |
| counter_inc core_calls | |
| if [[ "$run_count" -gt 0 ]]; then | |
| hits=$((hits + 1)) | |
| fi | |
| # Update matching entries in the enriched file with run data | |
| local tmp_file="${CACHE_DIR}/results_tmp.jsonl" | |
| jq --arg repo "$repo" --arg wf "$workflow" --argjson count "${run_count:-0}" ' | |
| if .repo == $repo and .workflow == $wf then | |
| . + {run_count: $count, ran_during_window: ($count > 0)} | |
| else . end | |
| ' "$enriched_file" > "$tmp_file" | |
| mv "$tmp_file" "$enriched_file" | |
| done <<< "$at_risk" | |
| # Also mark SHA-pinned entries as safe (no runs to check) | |
| local tmp_file="${CACHE_DIR}/results_tmp.jsonl" | |
| jq 'if .pin_type == "sha" then . + {run_count: 0, ran_during_window: false} else . end' \ | |
| "$enriched_file" > "$tmp_file" | |
| mv "$tmp_file" "$enriched_file" | |
| # Replace results file | |
| mv "$enriched_file" "$RESULTS_FILE" | |
| progress " $hits of $total at-risk workflows ran during the window" | |
| } | |
| # ============================================================================= | |
| # Output formatting | |
| # ============================================================================= | |
| # Deduplicate results by repo+workflow+uses_ref. Outputs a JSON array. | |
| deduped_results() { | |
| if [[ ! -s "$RESULTS_FILE" ]]; then | |
| echo "[]" | |
| else | |
| jq -s 'unique_by(.repo + "|" + .workflow + "|" + .uses_ref)' "$RESULTS_FILE" | |
| fi | |
| } | |
| output_json() { | |
| local results_array | |
| results_array=$(deduped_results) | |
| local total direct indirect pin_sha pin_tag pin_branch pin_none | |
| total=$(echo "$results_array" | jq 'length') | |
| direct=$(echo "$results_array" | jq '[.[] | select(.reference_type == "direct")] | length') | |
| indirect=$(echo "$results_array" | jq '[.[] | select(.reference_type == "indirect")] | length') | |
| pin_sha=$(echo "$results_array" | jq '[.[] | select(.pin_type == "sha")] | length') | |
| pin_tag=$(echo "$results_array" | jq '[.[] | select(.pin_type == "tag")] | length') | |
| pin_branch=$(echo "$results_array" | jq '[.[] | select(.pin_type == "branch")] | length') | |
| pin_none=$(echo "$results_array" | jq '[.[] | select(.pin_type == "none")] | length') | |
| local unique_repos | |
| unique_repos=$(echo "$results_array" | jq '[.[].repo] | unique | length') | |
| local json_output | |
| json_output=$(jq -n \ | |
| --argjson actions "$(printf '%s\n' "${TARGET_ACTIONS[@]}" | jq -R . | jq -s .)" \ | |
| --arg org "$ORG" \ | |
| --arg scan_time "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ | |
| --argjson depth "$MAX_DEPTH" \ | |
| --argjson external "$SEARCH_EXTERNAL" \ | |
| --argjson results "$results_array" \ | |
| --argjson total "$total" \ | |
| --argjson unique_repos "$unique_repos" \ | |
| --argjson direct "$direct" \ | |
| --argjson indirect "$indirect" \ | |
| --argjson pin_sha "$pin_sha" \ | |
| --argjson pin_tag "$pin_tag" \ | |
| --argjson pin_branch "$pin_branch" \ | |
| --argjson pin_none "$pin_none" \ | |
| --argjson search_calls "$(counter_get search_calls)" \ | |
| --argjson core_calls "$(counter_get core_calls)" \ | |
| --argjson cache_hits "$(counter_get cache_hits)" \ | |
| '{ | |
| target_actions: $actions, | |
| org: $org, | |
| scan_time: $scan_time, | |
| options: {max_depth: $depth, search_external: $external}, | |
| results: $results, | |
| summary: { | |
| total_references: $total, | |
| unique_repos: $unique_repos, | |
| direct: $direct, | |
| indirect: $indirect, | |
| pinning: {sha: $pin_sha, tag: $pin_tag, branch: $pin_branch, none: $pin_none} | |
| }, | |
| api_usage: {search_calls: $search_calls, core_calls: $core_calls, cache_hits: $cache_hits} | |
| }') | |
| echo "$json_output" | |
| } | |
| output_text() { | |
| local results_array | |
| results_array=$(deduped_results) | |
| if [[ "$(echo "$results_array" | jq 'length')" -eq 0 ]]; then | |
| printf '\nNo references found.\n' | |
| return | |
| fi | |
| printf '\n' | |
| printf '%.0s=' {1..70} | |
| printf '\n' | |
| for action in "${TARGET_ACTIONS[@]}"; do | |
| printf ' Trace: %s in %s\n' "$action" "$ORG" | |
| done | |
| printf '%.0s=' {1..70} | |
| printf '\n' | |
| # Direct references | |
| local direct | |
| direct=$(echo "$results_array" | jq -c '[.[] | select(.reference_type == "direct")]') | |
| local direct_count | |
| direct_count=$(echo "$direct" | jq 'length') | |
| printf '\n DIRECT REFERENCES (%d)\n' "$direct_count" | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| if [[ "$direct_count" -gt 0 ]]; then | |
| echo "$direct" | jq -r 'group_by(.repo)[] | .[0].repo as $repo | | |
| "\n \($repo)", | |
| (.[] | " \(.workflow)", | |
| " \(.uses_ref)", | |
| " pin: \(.pin_type) (\(.pin_value)) \(if .pin_risk == "safe" then "✓" elif .pin_risk == "risky" then "⚠" else "✗" end)", | |
| " reusable: \(if .is_reusable_workflow then "YES (workflow_call)" else "no" end)", | |
| (if .ran_during_window == true then " RUNS: \(.run_count) during window (verify)" elif .ran_during_window == false then " runs: 0 (clear)" else empty end))' 2>/dev/null || true | |
| fi | |
| # Indirect references | |
| local indirect | |
| indirect=$(echo "$results_array" | jq -c '[.[] | select(.reference_type == "indirect")]') | |
| local indirect_count | |
| indirect_count=$(echo "$indirect" | jq 'length') | |
| printf '\n\n INDIRECT REFERENCES (%d) — via shared workflows\n' "$indirect_count" | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| if [[ "$indirect_count" -gt 0 ]]; then | |
| echo "$indirect" | jq -r 'group_by(.repo)[] | .[0].repo as $repo | | |
| "\n \($repo)", | |
| (.[] | " \(.workflow)", | |
| " \(.uses_ref)", | |
| " pin: \(.pin_type) (\(.pin_value)) \(if .pin_risk == "safe" then "✓" elif .pin_risk == "risky" then "⚠" else "✗" end)", | |
| (if .leaf_pin_type and .leaf_pin_type != .pin_type then " leaf: \(.leaf_pin_type) (\(.leaf_pin_value)) \(if .leaf_pin_risk == "safe" then "✓" elif .leaf_pin_risk == "risky" then "⚠" else "✗" end) ← actual action pin" else empty end), | |
| " via: \(.via_workflow // "unknown")", | |
| " chain: \(.chain | join(" → "))", | |
| (if .ran_during_window == true then " RUNS: \(.run_count) during window (verify)" elif .ran_during_window == false then " runs: 0 (clear)" else empty end))' 2>/dev/null || true | |
| fi | |
| # Summary | |
| local total pin_sha pin_tag pin_branch pin_none unique_repos | |
| total=$(echo "$results_array" | jq 'length') | |
| unique_repos=$(echo "$results_array" | jq '[.[].repo] | unique | length') | |
| pin_sha=$(echo "$results_array" | jq '[.[] | select(.pin_type == "sha")] | length') | |
| pin_tag=$(echo "$results_array" | jq '[.[] | select(.pin_type == "tag")] | length') | |
| pin_branch=$(echo "$results_array" | jq '[.[] | select(.pin_type == "branch")] | length') | |
| pin_none=$(echo "$results_array" | jq '[.[] | select(.pin_type == "none")] | length') | |
| printf '\n\n PINNING SUMMARY\n' | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| printf ' SHA-pinned: %3d (safe)\n' "$pin_sha" | |
| printf ' Tag-pinned: %3d (risky — tags are mutable)\n' "$pin_tag" | |
| printf ' Branch: %3d (dangerous)\n' "$pin_branch" | |
| if [[ "$pin_none" -gt 0 ]]; then | |
| printf ' No version: %3d (critical)\n' "$pin_none" | |
| fi | |
| printf ' ─────────────────\n' | |
| printf ' Total refs: %3d across %d repos\n' "$total" "$unique_repos" | |
| # Compromised workflows section (only when --check-runs was used) | |
| # Compromised: ran during window AND leaf pin is not safe | |
| local compromised | |
| compromised=$(echo "$results_array" | jq -c '[.[] | select(.ran_during_window == true and .leaf_pin_risk != "safe")]') | |
| local compromised_count | |
| compromised_count=$(echo "$compromised" | jq 'length') | |
| # Safe runs: ran during window but leaf pin was safe (e.g., through vsp-github-actions@0.35.0) | |
| local safe_runs | |
| safe_runs=$(echo "$results_array" | jq -c '[.[] | select(.ran_during_window == true and .leaf_pin_risk == "safe")]') | |
| local safe_runs_count | |
| safe_runs_count=$(echo "$safe_runs" | jq 'length') | |
| if [[ "$compromised_count" -gt 0 ]]; then | |
| local compromised_repos | |
| compromised_repos=$(echo "$compromised" | jq '[.[].repo] | unique | length') | |
| printf '\n\n *** POTENTIALLY COMPROMISED: RAN DURING WINDOW (%d across %d repos) ***\n' "$compromised_count" "$compromised_repos" | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| echo "$compromised" | jq -r 'group_by(.repo)[] | .[0].repo as $repo | | |
| "\n \($repo)", | |
| (.[] | " \(.workflow) - \(.run_count) runs [\(.leaf_pin_type // .pin_type) \(.leaf_pin_value // .pin_value)]")' 2>/dev/null || true | |
| printf '\n Note: Verify via git history that the workflow used a compromised ref at\n' | |
| printf ' the time of execution. Branch/tag state may have changed since.\n' | |
| elif [[ -n "$CHECK_RUNS_FROM" ]]; then | |
| printf '\n\n No at-risk workflows ran during %s..%s\n' "$CHECK_RUNS_FROM" "$CHECK_RUNS_TO" | |
| fi | |
| if [[ "$safe_runs_count" -gt 0 ]]; then | |
| local safe_repos | |
| safe_repos=$(echo "$safe_runs" | jq '[.[].repo] | unique | length') | |
| printf '\n SAFE: Ran during window but leaf action was SHA/safe-pinned (%d across %d repos)\n' "$safe_runs_count" "$safe_repos" | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| echo "$safe_runs" | jq -r 'group_by(.repo)[] | .[0].repo as $repo | | |
| "\n \($repo)", | |
| (.[] | " \(.workflow) [\(.leaf_pin_type) \(.leaf_pin_value)]")' 2>/dev/null || true | |
| printf '\n' | |
| fi | |
| printf '\n API USAGE\n' | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| printf ' Search API calls: %d\n' "$(counter_get search_calls)" | |
| printf ' Core API calls: %d\n' "$(counter_get core_calls)" | |
| printf ' Cache hits: %d\n' "$(counter_get cache_hits)" | |
| printf '\n' | |
| } | |
| # Generate CSV report. | |
| # Args: $1 = output file path | |
| output_csv() { | |
| local outfile="$1" | |
| local results_array | |
| results_array=$(deduped_results) | |
| { | |
| echo "repo,workflow,uses_ref,reference_type,pin_type,pin_value,pin_risk,leaf_pin_type,leaf_pin_value,leaf_pin_risk,is_reusable,target_action,via_workflow,run_count,ran_during_window" | |
| echo "$results_array" | jq -r '.[] | | |
| [.repo, .workflow, .uses_ref, .reference_type, .pin_type, .pin_value, .pin_risk, | |
| (.leaf_pin_type // ""), (.leaf_pin_value // ""), (.leaf_pin_risk // ""), | |
| (if .is_reusable_workflow then "yes" else "no" end), | |
| .target_action, (.via_workflow // ""), | |
| (.run_count // ""), (if .ran_during_window == true then "yes" elif .ran_during_window == false then "no" else "" end)] | @csv' | |
| } > "$outfile" | |
| } | |
| # ============================================================================= | |
| # Main | |
| # ============================================================================= | |
| main() { | |
| parse_args "$@" | |
| # Hide cursor during progress display | |
| hide_cursor | |
| # Set up temp directory for cache, results, and counters | |
| CACHE_DIR=$(mktemp -d "${TMPDIR:-/tmp}/gh-action-trace.XXXXXX") | |
| COUNTER_DIR="$CACHE_DIR" | |
| RESULTS_FILE="${CACHE_DIR}/results.jsonl" | |
| VISITED_WORKFLOWS_FILE="${CACHE_DIR}/visited_workflows.txt" | |
| VISITED_SEARCHES_FILE="${CACHE_DIR}/visited_searches.txt" | |
| touch "$RESULTS_FILE" "$VISITED_WORKFLOWS_FILE" "$VISITED_SEARCHES_FILE" | |
| # Initialize file-based counters | |
| counter_init search_calls | |
| counter_init core_calls | |
| counter_init cache_hits | |
| counter_init found | |
| counter_set search_remaining 10 | |
| counter_set search_limit 10 | |
| counter_set search_reset 0 | |
| counter_set core_remaining 5000 | |
| log "Cache dir: $CACHE_DIR" | |
| log "Org: $ORG" | |
| log "Actions: ${TARGET_ACTIONS[*]}" | |
| log "Max depth: $MAX_DEPTH" | |
| # Check gh auth | |
| gh auth status &>/dev/null || die "Not authenticated with gh. Run: gh auth login" | |
| # Refresh rate limits before starting so we know our budget | |
| refresh_rate_limit | |
| # Trace each target action | |
| for action in "${TARGET_ACTIONS[@]}"; do | |
| progress "Tracing: $action in $ORG (depth=$MAX_DEPTH)" | |
| trace_action "$action" | |
| done | |
| # Enrich indirect refs with leaf pin status | |
| enrich_leaf_pins | |
| # Check workflow runs if --check-runs was specified | |
| if [[ -n "$CHECK_RUNS_FROM" ]]; then | |
| check_workflow_runs | |
| fi | |
| show_cursor | |
| progress "Done. $(counter_get search_calls) search + $(counter_get core_calls) core API calls." | |
| # Build report filename base from org and actions | |
| local report_slug | |
| report_slug="${ORG}" | |
| for action in "${TARGET_ACTIONS[@]}"; do | |
| report_slug="${report_slug}_${action##*/}" | |
| done | |
| report_slug=$(echo "$report_slug" | tr '/' '-' | tr ' ' '-') | |
| local timestamp | |
| timestamp=$(date +%Y%m%d-%H%M%S) | |
| local report_base="gh-action-trace_${report_slug}_${timestamp}" | |
| # Generate reports based on format | |
| local json_file="${report_base}.json" | |
| local csv_file="${report_base}.csv" | |
| case "$FORMAT" in | |
| text) | |
| output_text | |
| ;; | |
| json) | |
| if [[ -n "$OUTPUT_FILE" ]]; then | |
| output_json > "$OUTPUT_FILE" | |
| else | |
| output_json > "$json_file" | |
| fi | |
| output_csv "$csv_file" | |
| ;; | |
| both) | |
| output_text | |
| if [[ -n "$OUTPUT_FILE" ]]; then | |
| output_json > "$OUTPUT_FILE" | |
| else | |
| output_json > "$json_file" | |
| fi | |
| output_csv "$csv_file" | |
| ;; | |
| esac | |
| # Show report file paths (only for formats that generate files) | |
| if [[ "$FORMAT" != "text" ]]; then | |
| printf '\n REPORTS\n' | |
| printf '%.0s-' {1..40} | |
| printf '\n' | |
| if [[ -n "$OUTPUT_FILE" ]]; then | |
| printf ' JSON: %s\n' "$OUTPUT_FILE" | |
| else | |
| printf ' JSON: %s\n' "$json_file" | |
| fi | |
| printf ' CSV: %s\n' "$csv_file" | |
| printf '\n' | |
| fi | |
| } | |
| main "$@" |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
gh-action-trace
Find all direct and transitive uses of GitHub Actions across an org. Built during the Trivy supply chain attack response to solve a gap: GitHub code search finds direct action references, but not repos using actions indirectly through shared/reusable workflows.
Features
workflow_call) wrapping the target action, then recursively finds all callers--external)--action foo --action bar)--depth N)--check-runs FROM..TO)Dependencies
gh) - authenticatedjqbashbase64(coreutils)Installation
Local
Docker (no local install needed)
The script is downloaded inside the ephemeral container and never touches the host filesystem:
Usage
Output
By default (
--format both), reports are auto-generated in the current directory:gh-action-trace_<org>_<action>_<timestamp>.json- full structured resultsgh-action-trace_<org>_<action>_<timestamp>.csv- spreadsheet-friendlyUse
--format textfor text-only (no files). Use--format jsonfor reports only (no text summary).Text output includes:
How it works
--external): Searches all of GitHub for reusable workflows wrapping the action, then checks if the org calls them--depthlevels.--check-runs): For each at-risk (non-SHA-pinned) result, queries the GitHub Actions API to see if the workflow ran during the specified time window. Flags potentially compromised workflows in the output. Results should be verified via git history, as branch/tag state may have changed since the incident. The tool checks current refs, not the exact ref resolved at execution time.