Last active
November 10, 2016 13:48
-
-
Save db48x/a1a8847916ab149abbfce25517944bdc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Assign identifier and collection to variables for use in final output. | |
| .metadata.identifier as $i | | |
| .metadata.collection as $c | | |
| # Filter out any items that do not have files metadata. | |
| select(.files != null) | | |
| # Get all non-derivative files that have a file size, and slim down the metadata. | |
| .files | | |
| map( | |
| select(.source != "derivative") | | |
| # if case for catching files with size=null (i.e. files.xml). | |
| if .size != null then | |
| {"url": "https://archive.org/download/\($i)/\(.name)", "size": (.size | tonumber), "collection": $c[0], "md5": .md5} | |
| else | |
| {"url": "https://archive.org/download/\($i)/\(.name)", "size": 0, "collection": $c[0], "md5": .md5} | |
| end | |
| ) | | |
| map([.md5, .size, .collection, .url]) | map(@tsv) | .[] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| set -e | |
| set -x | |
| list=$1 | |
| num=$2 | |
| if [ -z "$list" ] || [ -z "$num" ]; then | |
| echo "usage: mkSHARD collection-list N" >&2 | |
| exit 1 | |
| fi | |
| IFS=" | |
| " | |
| cp "$1" "SHARD$num.list" | |
| wc -l "SHARD$num.list" | |
| git init "SHARD$num" | |
| cd "SHARD$num" | |
| git annex init | |
| ../importlist ../"SHARD$num.list" | |
| git commit --quiet -a -m "creating SHARD$num" | |
| git annex dead . | |
| git gc --aggressive | |
| git annex info . | |
| cd .. | |
| git clone "SHARD$num" "SHARD$num".git --bare | |
| rm -rf "SHARD$num" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| set -e | |
| set -x | |
| collection=${1} | |
| itemfile=${collection}-ids.txt | |
| ia-mine --secure -c -s "collection:${1}" --itemlist >"${itemfile}" | |
| lines=$(wc -l "${itemfile}" | cut -d ' ' -f 1) | |
| chunks=$((lines/100000)) | |
| split -nl/${chunks} -d --additional-suffix=.txt "${itemfile}" "${collection}-meta-" | |
| for f in ${collection}-meta-*.json; do | |
| ia-mine --secure -c "${f}" > "$(basename "${f/meta/files/}" .json).txt" | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment