Created
May 2, 2019 17:39
-
-
Save jweisman/7baf40cdfdeb02b11abfac28243929f0 to your computer and use it in GitHub Desktop.
Automating fulltext extraction in the Alma Digital Repository
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| AWS_BUCKET="na-st01.ext.exlibrisgroup.com" | |
| FILENAME=`basename "$1"` | |
| FOLDER=$((1 + RANDOM % 32767)) | |
| if [ $# -lt 2 ]; then | |
| echo "Usage: `basename $0` file-name representation-id institution [file-label]" | |
| exit 0 | |
| fi | |
| echo Uploading $1 | |
| aws s3 cp --quiet "$1" "s3://$AWS_BUCKET/$3/upload/$FOLDER/$FILENAME" | |
| # Getting BIB for rep id $2 | |
| BIB=`curl -s -H "Authorization: apikey $ALMA_APIKEY" -H "Accept: application/xml" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs?representation_id=$2&view=brief" | xmllint --xpath "string(/bibs/bib/mms_id)" -` | |
| echo Adding file to representation | |
| curl -s -o /dev/null -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/xml" -H "Accept: application/json" --data "<representation_file><label>$4</label><path>$3/upload/$FOLDER/$FILENAME</path></representation_file>" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/$BIB/representations/$2/files" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| JOB_ID=M50157; | |
| INSTITUTION='TR_INTEGRATION_INST' | |
| if [ $# -lt 2 ]; then | |
| echo "Usage: `basename $0` mms_id directory" | |
| exit 0 | |
| fi | |
| files=("$2"/*) | |
| label=$(basename $files); | |
| label=`echo ${label%.*} | grep -o '.*[^0-9]'` | |
| echo "Adding representation to mms_id $1" | |
| REP_ID=`curl -s --fail -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data "{\"library\":{\"value\":\"MAIN\"}, \"is_remote\": \"false\",\"label\": \"$label\",\"usage_type\":{\"value\":\"PRESERVATION_MASTER\"}}" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/$1/representations" | jq -r '.id'` | |
| echo "Adding files to representation $REP_ID" | |
| i=1; | |
| for file in "$2"/*; do | |
| ./add_file.sh "$file" $REP_ID $INSTITUTION "Page $i" | |
| ((i++)); | |
| done | |
| echo "Creating new set" | |
| SET_ID=`curl -s --fail -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data '{"name": "Digital files for fulltext","type": {"value": "ITEMIZED"},"content": {"value": "FILE"}}' "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets" | jq -r '.id'` | |
| res=$? | |
| if test $res != 0; then | |
| echo "HTTP request failed with return code $res" | |
| exit $res | |
| fi | |
| echo "Adding files to set" | |
| MEMBERS=`curl -s -H "Authorization: apikey $ALMA_APIKEY" -H "Accept: application/json" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/bibs/$1/representations/$REP_ID/files" | jq -c '{members:{member: (.representation_file | map({id:.pid}))}}'` | |
| curl -s -o /dev/null -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data $MEMBERS "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/$SET_ID?op=replace_members" | |
| echo "Running job" | |
| jq -c ".parameter |= map(if .name.value == \"set_id\" then (.value=\"$SET_ID\") else . end)" job.json > job.tmp && mv job.tmp job.json | |
| ./run_job.sh $JOB_ID job.json | |
| echo "Deleting set $SET_ID" | |
| curl -s -X DELETE -H "Authorization: apikey $ALMA_APIKEY" "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/sets/$SET_ID" | |
| echo "Opening new representation" | |
| open "https://na01.alma.exlibrisgroup.com/view/BookReaderViewer/$INSTITUTION/$REP_ID" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "parameter": [ | |
| { | |
| "name": { | |
| "value": "task_ExtractFulltext_overwriteProvided" | |
| }, | |
| "value": "false" | |
| }, | |
| { | |
| "name": { | |
| "value": "set_id" | |
| }, | |
| "value": "5617966300000561" | |
| }, | |
| { | |
| "name": { | |
| "value": "job_name" | |
| }, | |
| "value": "Extract Fulltext - via API - Digital files for full text extraction" | |
| } | |
| ] | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| if [ $# -lt 2 ]; then | |
| echo "Usage: `basename $0` job-id parameters-file" | |
| exit 0 | |
| fi | |
| echo "Submitting the job" | |
| JOB_INSTANCE_URL=`curl -s --fail -X POST -H "Authorization: apikey $ALMA_APIKEY" -H "Content-type: application/json" -H "Accept: application/json" --data @$2 "https://api-na.hosted.exlibrisgroup.com/almaws/v1/conf/jobs/$1?op=run" | jq --raw-output '.additional_info.link'` | |
| res=$? | |
| if test $res != 0; then | |
| echo "HTTP request failed with return code $res" | |
| exit $res | |
| fi | |
| echo "Checking the job status at $JOB_INSTANCE_URL" | |
| until [[ "$JOB_STATUS" == "COMPLETED"* ]]; do | |
| sleep 3 | |
| curl -s --fail -H "Authorization: apikey $ALMA_APIKEY" -H "Accept: application/xml" "$JOB_INSTANCE_URL" > job_instance.xml | |
| JOB_PROGRESS=`xmllint --xpath '/job_instance/progress/text()' job_instance.xml` | |
| JOB_STATUS=`xmllint --xpath '/job_instance/status/text()' job_instance.xml` | |
| echo "Job progress: $JOB_PROGRESS; Job status: $JOB_STATUS" | |
| done | |
| if [ "$JOB_STATUS" = "COMPLETED_SUCCESS" ]; then | |
| exit 0; | |
| else | |
| echo "Job not completed successfully." | |
| exit 1; | |
| fi | |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Expects the following environment variables:
ALMA_APIKEYAWS_ACCESS_KEY_IDAWS_SECRET_ACCESS_KEYRun with 2 parameters: MMS_ID and the directory which contains the files, as follows: