Skip to content

Instantly share code, notes, and snippets.

@sbassett29
Last active November 26, 2024 19:24
Show Gist options
  • Save sbassett29/1d9c1cf6009e9414a414d3ddbe50b360 to your computer and use it in GitHub Desktop.
Save sbassett29/1d9c1cf6009e9414a414d3ddbe50b360 to your computer and use it in GitHub Desktop.
Get all active Wikimedia repos
GITHUB_TOKEN=abc123
GITLAB_TOKEN=abc123
#!/usr/bin/env bash
################################################################################
# Author: [email protected]
# License: Apache 2 <https://opensource.org/licenses/Apache-2.0>
################################################################################
curl -s https://gerrit.wikimedia.org/r/projects/?all | sed 1d | \
jq -c '.[] | select(.state=="ACTIVE")' | wc -l
#!/usr/bin/env bash
# based upon https://gist.github.com/mbohun/b161521b2440b9f08b59
DEBUG=false
GITHUB_API_BASE="https://api.github.com"
GITHUB_API_REST="/orgs/wikimedia/repos"
GITHUB_API_HEADER_ACCEPT="Accept: application/vnd.github.v3+json"
# set GITHUB_TOKEN
source .env
github_api_data=""
function rest_call {
curl -s $1 -H "${GITHUB_API_HEADER_ACCEPT}" -H "Authorization: token $GITHUB_TOKEN"
}
# single page result-s (no pagination), have no link: section, the grep result is empty
last_page=$(curl -s -I "${GITHUB_API_BASE}${GITHUB_API_REST}" -H "${GITHUB_API_HEADER_ACCEPT}" \
-H "Authorization: token $GITHUB_TOKEN" | grep '^link:' | sed -e 's/^link:.*page=//g' -e 's/>.*$//g')
# does this result use pagination? (or in abbreviated debug mode)
if [ -z "$last_page" ] || [ $DEBUG = true ]; then
# no - this result has only one page
github_api_data+=$(rest_call "${GITHUB_API_BASE}${GITHUB_API_REST}" | sed '1d;$d')
elif [ $DEBUG = false ]; then
# yes - this result is on multiple pages
for p in `seq 1 $last_page`; do
github_api_data+=$(rest_call "${GITHUB_API_BASE}${GITHUB_API_REST}?page=$p" | sed '1d;$d')
if [ $p != $last_page ]; then
github_api_data+=","
fi
done
fi
# filter mirrored repositories based upon common strings
deduped_repo_urls=$(echo "[ $github_api_data ]" | jq '[ .[] | select(.archived==false and .disabled==false and (.description | values | test("Mirror.+of|Mirorr.+of|Mirror.+from|Github.+mirror|is.+a.+mirror"; "i") | not)) ] | .[] | .html_url')
# check gerrit to see if we didn't filter something obvious
total_repos=0
for url in ${deduped_repo_urls[@]}
do
str_url=$(echo ${url#*\/wikimedia\/} | tr -d '"')
gerrit_api_response=$(curl -s "https://gerrit.wikimedia.org/r/projects/?query=$str_url" | sed 1d | jq '.[] | select(.state=="ACTIVE")' | xargs)
# no duplicate results found
if [ "$gerrit_api_response" == "" ]; then
if [ $DEBUG = true ]; then
echo "$str_url";
fi
total_repos=$((total_repos+1))
fi
done
echo "$total_repos"
#!/usr/bin/env bash
# based upon https://gist.github.com/mbohun/b161521b2440b9f08b59
DEBUG=false
GITLAB_API_BASE="https://gitlab.wikimedia.org"
GITLAB_API_REST="/api/v4/projects/?simple=yes&private=true&archived=false"
# set GITLAB_TOKEN
source .env
gitlab_api_data=""
function rest_call {
curl -s $1 -H "PRIVATE-TOKEN: $GITLAB_TOKEN"
}
# single page result-s (no pagination), have no link: section, the grep result is empty
last_page=$(curl -s -I "${GITLAB_API_BASE}${GITLAB_API_REST}" \
-H "PRIVATE-TOKEN: $GITLAB_TOKEN" | grep "^x-total-pages:" | tr -d -c 0-9)
# does this result use pagination? (or in abbreviated debug mode)
if [ -z "$last_page" ] || [ $DEBUG = true ]; then
# no - this result has only one page
gitlab_api_data+=$(rest_call "${GITLAB_API_BASE}${GITLAB_API_REST}" | sed 's/^.\(.*\).$/\1/')
elif [ $DEBUG = false ]; then
# yes - this result is on multiple pages
for p in `seq 1 $last_page`; do
gitlab_api_data+=$(rest_call "${GITLAB_API_BASE}${GITLAB_API_REST}&page=$p" | sed 's/^.\(.*\).$/\1/')
if [ $p != $last_page ]; then
gitlab_api_data+=","
fi
done
fi
all_active_repos_count=$(echo "[ $gitlab_api_data ]" | jq '[ .[] | select(.path_with_namespace | startswith("repos/")) ] | length')
echo "$all_active_repos_count"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment