Last active
November 26, 2024 19:24
-
-
Save sbassett29/1d9c1cf6009e9414a414d3ddbe50b360 to your computer and use it in GitHub Desktop.
Get all active Wikimedia repos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(see scripts) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GITHUB_TOKEN=abc123 | |
GITLAB_TOKEN=abc123 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
################################################################################ | |
# Author: [email protected] | |
# License: Apache 2 <https://opensource.org/licenses/Apache-2.0> | |
################################################################################ | |
curl -s https://gerrit.wikimedia.org/r/projects/?all | sed 1d | \ | |
jq -c '.[] | select(.state=="ACTIVE")' | wc -l |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# based upon https://gist.github.com/mbohun/b161521b2440b9f08b59 | |
DEBUG=false | |
GITHUB_API_BASE="https://api.github.com" | |
GITHUB_API_REST="/orgs/wikimedia/repos" | |
GITHUB_API_HEADER_ACCEPT="Accept: application/vnd.github.v3+json" | |
# set GITHUB_TOKEN | |
source .env | |
github_api_data="" | |
function rest_call { | |
curl -s $1 -H "${GITHUB_API_HEADER_ACCEPT}" -H "Authorization: token $GITHUB_TOKEN" | |
} | |
# single page result-s (no pagination), have no link: section, the grep result is empty | |
last_page=$(curl -s -I "${GITHUB_API_BASE}${GITHUB_API_REST}" -H "${GITHUB_API_HEADER_ACCEPT}" \ | |
-H "Authorization: token $GITHUB_TOKEN" | grep '^link:' | sed -e 's/^link:.*page=//g' -e 's/>.*$//g') | |
# does this result use pagination? (or in abbreviated debug mode) | |
if [ -z "$last_page" ] || [ $DEBUG = true ]; then | |
# no - this result has only one page | |
github_api_data+=$(rest_call "${GITHUB_API_BASE}${GITHUB_API_REST}" | sed '1d;$d') | |
elif [ $DEBUG = false ]; then | |
# yes - this result is on multiple pages | |
for p in `seq 1 $last_page`; do | |
github_api_data+=$(rest_call "${GITHUB_API_BASE}${GITHUB_API_REST}?page=$p" | sed '1d;$d') | |
if [ $p != $last_page ]; then | |
github_api_data+="," | |
fi | |
done | |
fi | |
# filter mirrored repositories based upon common strings | |
deduped_repo_urls=$(echo "[ $github_api_data ]" | jq '[ .[] | select(.archived==false and .disabled==false and (.description | values | test("Mirror.+of|Mirorr.+of|Mirror.+from|Github.+mirror|is.+a.+mirror"; "i") | not)) ] | .[] | .html_url') | |
# check gerrit to see if we didn't filter something obvious | |
total_repos=0 | |
for url in ${deduped_repo_urls[@]} | |
do | |
str_url=$(echo ${url#*\/wikimedia\/} | tr -d '"') | |
gerrit_api_response=$(curl -s "https://gerrit.wikimedia.org/r/projects/?query=$str_url" | sed 1d | jq '.[] | select(.state=="ACTIVE")' | xargs) | |
# no duplicate results found | |
if [ "$gerrit_api_response" == "" ]; then | |
if [ $DEBUG = true ]; then | |
echo "$str_url"; | |
fi | |
total_repos=$((total_repos+1)) | |
fi | |
done | |
echo "$total_repos" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# based upon https://gist.github.com/mbohun/b161521b2440b9f08b59 | |
DEBUG=false | |
GITLAB_API_BASE="https://gitlab.wikimedia.org" | |
GITLAB_API_REST="/api/v4/projects/?simple=yes&private=true&archived=false" | |
# set GITLAB_TOKEN | |
source .env | |
gitlab_api_data="" | |
function rest_call { | |
curl -s $1 -H "PRIVATE-TOKEN: $GITLAB_TOKEN" | |
} | |
# single page result-s (no pagination), have no link: section, the grep result is empty | |
last_page=$(curl -s -I "${GITLAB_API_BASE}${GITLAB_API_REST}" \ | |
-H "PRIVATE-TOKEN: $GITLAB_TOKEN" | grep "^x-total-pages:" | tr -d -c 0-9) | |
# does this result use pagination? (or in abbreviated debug mode) | |
if [ -z "$last_page" ] || [ $DEBUG = true ]; then | |
# no - this result has only one page | |
gitlab_api_data+=$(rest_call "${GITLAB_API_BASE}${GITLAB_API_REST}" | sed 's/^.\(.*\).$/\1/') | |
elif [ $DEBUG = false ]; then | |
# yes - this result is on multiple pages | |
for p in `seq 1 $last_page`; do | |
gitlab_api_data+=$(rest_call "${GITLAB_API_BASE}${GITLAB_API_REST}&page=$p" | sed 's/^.\(.*\).$/\1/') | |
if [ $p != $last_page ]; then | |
gitlab_api_data+="," | |
fi | |
done | |
fi | |
all_active_repos_count=$(echo "[ $gitlab_api_data ]" | jq '[ .[] | select(.path_with_namespace | startswith("repos/")) ] | length') | |
echo "$all_active_repos_count" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment