ernstki · February 20, 2025 12:30 · ernstki · May 22, 2024 · cmuller · Nov 21, 2024
diff --git a/gitlapi b/gitlapi
 #!/usr/bin/env bash
 ##
 ##  Query a GitLab v4 API endpoint, with pagination
 ##
 ##  Author:    Kevin Ernst <ernstki -at- mail.uc.edu>
 ##  License:   ISC or WTFPL, at your discretion
 ##  Date:      22 May 2024
 ##  Requires:  jq (https://github.com/jqlang/jq)
 ##  Homepage:  https://gist.github.com/ernstki/3707675c8a4ddb06d128154947c49e29
 ##
 ME=$(basename "${BASH_SOURCE[0]}")

 _urlencode() (
    # Author:   Chris Down (https://gist.github.com/cdown/1163649)
    #           with modifications to support multiple arguments by me
    # License:  Unknown
    LC_COLLATE=C
    while (( $# )); do
        for (( i = 0; i < ${#1}; i++ )); do
            c=${1:i:1}
            case $c in
                [a-zA-Z0-9.~_-]) printf "$c" ;;
                *)               printf '%%%02X' "'$c" ;;
            esac
        done
        if (( $# > 1 )); then printf '+'; fi  # delimit separate args w/ +'s
        shift
    done
 )

 _gitlab_api() (
    # set TRACE=1 in the environment to enable execution tracing
    if (( TRACE )); then set -x; fi
    set -u
    : ${GITLAB_URL:?Please define GITLAB_URL as the base URL for your GitLab instance}
    : ${GITLAB_TOKEN:?Please define GITLAB_TOKEN with your private token for the GitLab API}
    local api=$GITLAB_URL/api/v4
    local endpoint=/search
    local searchterms=()
    local curlargs=(
        --silent
        --header "Authorization: Bearer $GITLAB_TOKEN"
    )
    local perpage=20
    local queryargs= count= totals= all= pages= wantheader=

    while (( $# )); do
        case $1 in
            -h|--help|--flags|-\?)
                echo "
  $ME - query GitLab v4 API endpoints with pagination

  usage:
    $ME [-h|--help]
    $ME [-c|--count] [-t|--totals] { /endpoint | TERM [TERM…] }
    $ME [-a|--all] [-p|--pages INT] [-pp|--per-page INT]
    ${ME//?/ } { /endpoint | TERM [TERM…] } [&qs_arg1[&qs_arg2…]]

  where:
    -h, --help       shows this help
    -c, --count      just prints the number of results and returns
    -t, --totals     prints HTTP headers for # pages, # per page, total results
    -a, --all        returns all records instead of just the first page
    -p, --pages      limit results to this many pages (default: 1)
    -pp, --per-page  specifies page size (default: $perpage)

   …and other options starting with a dash are passed through to \`curl\`

  examples:
    $ $ME search terms            # code search for 'search' and 'terms'
    $ $ME --all '\"exact phrase\"'  # search for an exact phrase, all results
    $ $ME -I '\"search phrase\"'    # see HTTP headers for the above
    $ $ME --count /projects       # count how many projects

  homepage:
    https://gist.github.com/ernstki/3707675c8a4ddb06d128154947c49e29
 "
                return
                ;;
            -c|--count)
                count=1
                ;;
            -t|--totals)
                totals=1
                ;;
            -a|--all)
                all=1
                ;;
            -p|--pages)
                shift
                pages=$1
                ;;
            -pp|--pp|--per-page)
                shift
                perpage=$1
                ;;
            -*)
                # FIXME: should probably _only_ accept -I / --head
                if [[ $1 =~ -(i|-include) ]]; then
                    # because it intersperses headers into JSON output which
                    # `jq` can't handle
                    echo 'Ignoring unsupported `-i` / `--include` curl option.' >&2
                else
                    curlargs+=("$1")
                fi
                ;;
            /*)
                endpoint=$1
                ;;
            \&*)
                queryargs+=$1
                ;;
            *)
                searchterms+=("$1")
                ;;
        esac
        shift
    done

    if [[ ${#searchterms[*]} -gt 0 ]]; then
        if [[ $endpoint != /search ]]; then
            echo 'ERROR: Bare search terms only accepted for `/search` endpoint.' >&2
            return 1
        fi
        # otherwise
        queryargs+="&scope=blobs&search=$(set +x; _urlencode "${searchterms[@]}")"
        (( ${TRACE:-} )) && declare -p queryargs
    fi

    queryargs+="&per_page=$perpage"

    if (( all && pages )); then
        echo 'ERROR: The `--all` and `--pages` options are mutually-exclusive.' >&2
        return 1
    fi

    if (( count )); then
        # HTTP headers end with CR+LF, so make sure to get _only_ the digits
        curl --head "${curlargs[@]}" "$api$endpoint?$queryargs" \
          | sed -n 's/X-Total: \([[:digit:]][[:digit:]]*\).*/\1/p'
        return

    elif (( totals )); then
        # print summary of results using HTTP request headers
        curl --head "${curlargs[@]}" "$api$endpoint?$queryargs" \
          | sed -nE '/X-(Page|Per-Page|Total|Total-Pages):/p' \
          | tr -d \\r
        return

    elif (( all )); then
        # get the total number of pages
        pages=$(
            curl --head "${curlargs[@]}" "$api$endpoint?$queryargs" \
              | sed -n 's/X-Total-Pages: \([[:digit:]][[:digit:]]*\).*/\1/p'
        )
        if ! [[ $pages =~ ^[[:digit:]]+$ ]]; then
            echo "ERROR: Problem fetching total pages; try TRACE=1." >&2
            return 1
        fi

    else
        if (( !pages )); then pages=1; fi
    fi

    if [[ "${curlargs[*]}" =~ -(I|-head[^e]) ]]; then
        # the `[^e]` ensures we don't match because of `--header` (used to send
        # the bearer token, so always in the argument list)
        curl "${curlargs[@]}" "$api$endpoint?$queryargs"
        # only need first page of results, so don't pipe through `jq`
    else
        # the first unwraps each results array, the second combines all results
        # back into an array
        for (( p=1; p<=pages; p++ )); do
            if (( pages > 1 )); then echo "Fetching page ${p} of results…" >&2; fi
            curl "${curlargs[@]}" "$api$endpoint?$queryargs&page=$p" | jq '.[]'
        done | jq --slurp .
    fi
 )


 # https://stackoverflow.com/a/28776166/785213
 # works because you can't `return` from a script
 (return 0 2>/dev/null) && sourced=1 || sourced=0

 if (( !sourced )); then
    _gitlab_api "$@"
 fi
	#!/usr/bin/env bash
	##
	## Query a GitLab v4 API endpoint, with pagination
	##
	## Author: Kevin Ernst <ernstki -at- mail.uc.edu>
	## License: ISC or WTFPL, at your discretion
	## Date: 22 May 2024
	## Requires: jq (https://github.com/jqlang/jq)
	## Homepage: https://gist.github.com/ernstki/3707675c8a4ddb06d128154947c49e29
	##
	ME=$(basename "${BASH_SOURCE[0]}")

	_urlencode() (
	# Author: Chris Down (https://gist.github.com/cdown/1163649)
	# with modifications to support multiple arguments by me
	# License: Unknown
	LC_COLLATE=C
	while (( $# )); do
	for (( i = 0; i < ${#1}; i++ )); do
	c=${1:i:1}
	case $c in
	[a-zA-Z0-9.~_-]) printf "$c" ;;
	*) printf '%%%02X' "'$c" ;;
	esac
	done
	if (( $# > 1 )); then printf '+'; fi # delimit separate args w/ +'s
	shift
	done
	)

	_gitlab_api() (
	# set TRACE=1 in the environment to enable execution tracing
	if (( TRACE )); then set -x; fi
	set -u
	: ${GITLAB_URL:?Please define GITLAB_URL as the base URL for your GitLab instance}
	: ${GITLAB_TOKEN:?Please define GITLAB_TOKEN with your private token for the GitLab API}
	local api=$GITLAB_URL/api/v4
	local endpoint=/search
	local searchterms=()
	local curlargs=(
	--silent
	--header "Authorization: Bearer $GITLAB_TOKEN"
	)
	local perpage=20
	local queryargs= count= totals= all= pages= wantheader=

	while (( $# )); do
	case $1 in
	-h\|--help\|--flags\|-\?)
	echo "
	$ME - query GitLab v4 API endpoints with pagination

	usage:
	$ME [-h\|--help]
	$ME [-c\|--count] [-t\|--totals] { /endpoint \| TERM [TERM…] }
	$ME [-a\|--all] [-p\|--pages INT] [-pp\|--per-page INT]
	${ME//?/ } { /endpoint \| TERM [TERM…] } [&qs_arg1[&qs_arg2…]]

	where:
	-h, --help shows this help
	-c, --count just prints the number of results and returns
	-t, --totals prints HTTP headers for # pages, # per page, total results
	-a, --all returns all records instead of just the first page
	-p, --pages limit results to this many pages (default: 1)
	-pp, --per-page specifies page size (default: $perpage)

	…and other options starting with a dash are passed through to \`curl\`

	examples:
	$ $ME search terms # code search for 'search' and 'terms'
	$ $ME --all '\"exact phrase\"' # search for an exact phrase, all results
	$ $ME -I '\"search phrase\"' # see HTTP headers for the above
	$ $ME --count /projects # count how many projects

	homepage:
	https://gist.github.com/ernstki/3707675c8a4ddb06d128154947c49e29
	"
	return
	;;
	-c\|--count)
	count=1
	;;
	-t\|--totals)
	totals=1
	;;
	-a\|--all)
	all=1
	;;
	-p\|--pages)
	shift
	pages=$1
	;;
	-pp\|--pp\|--per-page)
	shift
	perpage=$1
	;;
	-*)
	# FIXME: should probably _only_ accept -I / --head
	if [[ $1 =~ -(i\|-include) ]]; then
	# because it intersperses headers into JSON output which
	# `jq` can't handle
	echo 'Ignoring unsupported `-i` / `--include` curl option.' >&2
	else
	curlargs+=("$1")
	fi
	;;
	/*)
	endpoint=$1
	;;
	\&*)
	queryargs+=$1
	;;
	*)
	searchterms+=("$1")
	;;
	esac
	shift
	done

	if [[ ${#searchterms[*]} -gt 0 ]]; then
	if [[ $endpoint != /search ]]; then
	echo 'ERROR: Bare search terms only accepted for `/search` endpoint.' >&2
	return 1
	fi
	# otherwise
	queryargs+="&scope=blobs&search=$(set +x; _urlencode "${searchterms[@]}")"
	(( ${TRACE:-} )) && declare -p queryargs
	fi

	queryargs+="&per_page=$perpage"

	if (( all && pages )); then
	echo 'ERROR: The `--all` and `--pages` options are mutually-exclusive.' >&2
	return 1
	fi

	if (( count )); then
	# HTTP headers end with CR+LF, so make sure to get _only_ the digits
	curl --head "${curlargs[@]}" "$api$endpoint?$queryargs" \
	\| sed -n 's/X-Total: \([[:digit:]][[:digit:]]\)./\1/p'
	return

	elif (( totals )); then
	# print summary of results using HTTP request headers
	curl --head "${curlargs[@]}" "$api$endpoint?$queryargs" \
	\| sed -nE '/X-(Page\|Per-Page\|Total\|Total-Pages):/p' \
	\| tr -d \\r
	return

	elif (( all )); then
	# get the total number of pages
	pages=$(
	curl --head "${curlargs[@]}" "$api$endpoint?$queryargs" \
	\| sed -n 's/X-Total-Pages: \([[:digit:]][[:digit:]]\)./\1/p'
	)
	if ! [[ $pages =~ ^[[:digit:]]+$ ]]; then
	echo "ERROR: Problem fetching total pages; try TRACE=1." >&2
	return 1
	fi

	else
	if (( !pages )); then pages=1; fi
	fi

	if [[ "${curlargs[*]}" =~ -(I\|-head[^e]) ]]; then
	# the `[^e]` ensures we don't match because of `--header` (used to send
	# the bearer token, so always in the argument list)
	curl "${curlargs[@]}" "$api$endpoint?$queryargs"
	# only need first page of results, so don't pipe through `jq`
	else
	# the first unwraps each results array, the second combines all results
	# back into an array
	for (( p=1; p<=pages; p++ )); do
	if (( pages > 1 )); then echo "Fetching page ${p} of results…" >&2; fi
	curl "${curlargs[@]}" "$api$endpoint?$queryargs&page=$p" \| jq '.[]'
	done \| jq --slurp .
	fi
	)


	# https://stackoverflow.com/a/28776166/785213
	# works because you can't `return` from a script
	(return 0 2>/dev/null) && sourced=1 \|\| sourced=0

	if (( !sourced )); then
	_gitlab_api "$@"
	fi