Created
August 19, 2019 22:43
-
-
Save lucaswerkmeister/0bc668691eacd4827811feead1f41f11 to your computer and use it in GitHub Desktop.
script to download all files in a Wikimedia Commons category
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
declare -A original_params=( | |
[action]=query | |
[generator]=categorymembers | |
[gcmtitle]="Category:${1:?category not specified}" | |
[gcmtype]=file | |
[gcmlimit]=max | |
[prop]=imageinfo | |
[iiprop]=size | |
[format]=json | |
[formatversion]=2 | |
) | |
declare -A continue_params=() | |
declare -a titles=() | |
declare -i size=0 | |
first_iteration=1 | |
while ((first_iteration)) || ((${#continue_params[@]})); do | |
first_iteration=0 | |
declare -a params=() | |
for key in "${!original_params[@]}"; do | |
params+=('-d' "$key=${original_params[$key]}") | |
done | |
for key in "${!continue_params[@]}"; do | |
params+=('-d' "$key=${continue_params[$key]}") | |
done | |
output=$(curl -s https://commons.wikimedia.org/w/api.php "${params[@]}") | |
declare -A continue_params=() | |
while IFS=$'\t' read -r key value; do | |
continue_params[$key]=$value | |
done < <(jq -r '.continue | select(.) | to_entries | .[] | (.key + "\t" + .value)' <<< "$output") | |
while IFS=$'\t' read -r title file_size; do | |
titles+=("$title") | |
((size+=file_size)) | |
done < <(jq -r '.query.pages | .[] | (.title + "\t" + (.imageinfo[0].size | tostring))' <<< "$output") | |
done | |
free=$(df -PB1 . | tail -1 | awk '{print $4}') | |
if ((size > free)); then | |
printf >&2 'Need %d free bytes but only detected %d!\n' "$size" "$free" | |
exit 1 | |
elif ((size > free/10)); then | |
printf >&2 'This download will consume more than 10%% of the remaining free space on disk (%d out of %d bytes).\n' "$size" "$free" | |
fi | |
for title_index in "${!titles[@]}"; do | |
title=${titles[$title_index]} | |
printf >&2 '== %d/%d: %s ==\n' "$((title_index + 1))" "${#titles[@]}" "$title" | |
youtube-dl https://commons.wikimedia.org/wiki/Special:FilePath/"$title" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment