Skip to content

Instantly share code, notes, and snippets.

@ehrenfeu
Created October 16, 2015 10:48
Show Gist options
  • Save ehrenfeu/c53caebe893345e104d8 to your computer and use it in GitHub Desktop.
Save ehrenfeu/c53caebe893345e104d8 to your computer and use it in GitHub Desktop.
"I, Librarian" PDF file exporter
#!/bin/bash
#
LIB="/storage/www/librarian/library"
DB="${LIB}/database/library.sq3"
TGT="/storage/pdf_export"
START_ID="4500"
STOP_ID="99999999"
MAX_LEN="150"
# check if connecting and querying the DB works:
sqlite3 "${DB}" 'SELECT MAX(id) FROM library;' > /dev/null 2>&1 || {
echo "ERROR accessing database: ${DB}"
exit 100
}
# remember the IFS:
OLD_IFS=$IFS
# query all required fields from the DB and loop over the results:
sqlite3 "${DB}" \
"SELECT file, authors, year, journal, title
FROM library
WHERE id > '${START_ID}'
AND id < '${STOP_ID}';" |
while read RESULT ; do
# split result into separate parts using the internal field separator (IFS)
IFS='|'
ARRAY=(${RESULT})
# reset the IFS
IFS=$OLD_IFS
ORIG=${ARRAY[0]}
AUTHOR=${ARRAY[1]}
YEAR=${ARRAY[2]}
JOURNAL=${ARRAY[3]}
TITLE=${ARRAY[4]}
if [ "${JOURNAL}" == "" ] ; then
echo "JOURNAL-field of '$TITLE' is empty, skipping PDF"
continue
fi
# cut away everything after the first comma, then remove
# everything after the last blank (to remove the first name initial)
AUTHOR="$(echo ${AUTHOR} | sed 's/,.*// ; s/ [^ ]*$//')"
FILENAME="${AUTHOR} ${YEAR} ${JOURNAL} ${TITLE}"
# remove special chars
# replace multiple consecutive whitespaces by a single one
# replace "PNAS"
# replace "C elegans"
FILENAME="$(echo ${FILENAME} | sed 's/[,:/"?]//g ;
s/ / /g ;
s/Proc Natl Acad Sci U S A/PNAS/ ;
s/Caenorhabditis elegans/C elegans/ ;
')"
# if title exceeds the maximum length, we have cut it and
# we need to remove word-fragments at the end:
NAME_LEN="$(echo -n ${FILENAME} | wc -c)"
if [ "${NAME_LEN}" -gt "${MAX_LEN}" ] ; then
echo "filename-length: $NAME_LEN, cutting:"
# cut filename to specified length
FILENAME="$(echo ${FILENAME} |
cut -c 1-${MAX_LEN} | sed 's/ [^ ]*$//')"
fi
# remove double full stops:
FILENAME="$(echo "${FILENAME}.pdf" | sed 's/\.\././g')"
echo -n "processing ${ORIG} -> ${FILENAME} "
if ! [ -f "${TGT}/${FILENAME}" ] ; then
cp "${LIB}/${ORIG}" "${TGT}/${FILENAME}" && echo "[copied]"
# echo "[ NEW ]"
else
echo
fi
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment