ehrenfeu · October 16, 2015 10:48
diff --git a/i-librarian_pdf_export.sh b/i-librarian_pdf_export.sh
 #!/bin/bash
 #

 LIB="/storage/www/librarian/library"
 DB="${LIB}/database/library.sq3"
 TGT="/storage/pdf_export"

 START_ID="4500"
 STOP_ID="99999999"

 MAX_LEN="150"

 # check if connecting and querying the DB works:
 sqlite3 "${DB}" 'SELECT MAX(id) FROM library;' > /dev/null 2>&1 || {
        echo "ERROR accessing database: ${DB}"
        exit 100
 }

 # remember the IFS:
 OLD_IFS=$IFS
 # query all required fields from the DB and loop over the results:
 sqlite3 "${DB}" \
    "SELECT file, authors, year, journal, title
      FROM library
      WHERE id > '${START_ID}'
      AND id < '${STOP_ID}';" |
    while read RESULT ; do
        # split result into separate parts using the internal field separator (IFS)
        IFS='|'
        ARRAY=(${RESULT})
        # reset the IFS
        IFS=$OLD_IFS
        ORIG=${ARRAY[0]}
        AUTHOR=${ARRAY[1]}
        YEAR=${ARRAY[2]}
        JOURNAL=${ARRAY[3]}
        TITLE=${ARRAY[4]}
        if [ "${JOURNAL}" == "" ] ; then
                echo "JOURNAL-field of '$TITLE' is empty, skipping PDF"
                continue
        fi
        # cut away everything after the first comma, then remove
        # everything after the last blank (to remove the first name initial)
        AUTHOR="$(echo ${AUTHOR} | sed 's/,.*// ; s/ [^ ]*$//')"
        FILENAME="${AUTHOR} ${YEAR} ${JOURNAL} ${TITLE}"
        # remove special chars
        # replace multiple consecutive whitespaces by a single one
        # replace "PNAS"
        # replace "C elegans"
        FILENAME="$(echo ${FILENAME} | sed 's/[,:/"?]//g ;
                s/  / /g ;
                s/Proc Natl Acad Sci U S A/PNAS/ ;
                s/Caenorhabditis elegans/C elegans/ ;
                ')"
        # if title exceeds the maximum length, we have cut it and
        # we need to remove word-fragments at the end:
        NAME_LEN="$(echo -n ${FILENAME} | wc -c)"
        if [ "${NAME_LEN}" -gt "${MAX_LEN}" ] ; then
                echo "filename-length: $NAME_LEN, cutting:"
                # cut filename to specified length
                FILENAME="$(echo ${FILENAME} | 
                        cut -c 1-${MAX_LEN} | sed 's/ [^ ]*$//')"
        fi
        # remove double full stops:
        FILENAME="$(echo "${FILENAME}.pdf" | sed 's/\.\././g')"
        echo -n "processing ${ORIG} -> ${FILENAME} "
        if ! [ -f "${TGT}/${FILENAME}" ] ; then
                cp "${LIB}/${ORIG}" "${TGT}/${FILENAME}" && echo "[copied]"
                # echo "[ NEW ]"
        else
                echo
        fi
 done
	#!/bin/bash
	#

	LIB="/storage/www/librarian/library"
	DB="${LIB}/database/library.sq3"
	TGT="/storage/pdf_export"

	START_ID="4500"
	STOP_ID="99999999"

	MAX_LEN="150"

	# check if connecting and querying the DB works:
	sqlite3 "${DB}" 'SELECT MAX(id) FROM library;' > /dev/null 2>&1 \|\| {
	echo "ERROR accessing database: ${DB}"
	exit 100
	}

	# remember the IFS:
	OLD_IFS=$IFS
	# query all required fields from the DB and loop over the results:
	sqlite3 "${DB}" \
	"SELECT file, authors, year, journal, title
	FROM library
	WHERE id > '${START_ID}'
	AND id < '${STOP_ID}';" \|
	while read RESULT ; do
	# split result into separate parts using the internal field separator (IFS)
	IFS='\|'
	ARRAY=(${RESULT})
	# reset the IFS
	IFS=$OLD_IFS
	ORIG=${ARRAY[0]}
	AUTHOR=${ARRAY[1]}
	YEAR=${ARRAY[2]}
	JOURNAL=${ARRAY[3]}
	TITLE=${ARRAY[4]}
	if [ "${JOURNAL}" == "" ] ; then
	echo "JOURNAL-field of '$TITLE' is empty, skipping PDF"
	continue
	fi
	# cut away everything after the first comma, then remove
	# everything after the last blank (to remove the first name initial)
	AUTHOR="$(echo ${AUTHOR} \| sed 's/,.// ; s/ [^ ]$//')"
	FILENAME="${AUTHOR} ${YEAR} ${JOURNAL} ${TITLE}"
	# remove special chars
	# replace multiple consecutive whitespaces by a single one
	# replace "PNAS"
	# replace "C elegans"
	FILENAME="$(echo ${FILENAME} \| sed 's/[,:/"?]//g ;
	s/ / /g ;
	s/Proc Natl Acad Sci U S A/PNAS/ ;
	s/Caenorhabditis elegans/C elegans/ ;
	')"
	# if title exceeds the maximum length, we have cut it and
	# we need to remove word-fragments at the end:
	NAME_LEN="$(echo -n ${FILENAME} \| wc -c)"
	if [ "${NAME_LEN}" -gt "${MAX_LEN}" ] ; then
	echo "filename-length: $NAME_LEN, cutting:"
	# cut filename to specified length
	FILENAME="$(echo ${FILENAME} \|
	cut -c 1-${MAX_LEN} \| sed 's/ [^ ]*$//')"
	fi
	# remove double full stops:
	FILENAME="$(echo "${FILENAME}.pdf" \| sed 's/\.\././g')"
	echo -n "processing ${ORIG} -> ${FILENAME} "
	if ! [ -f "${TGT}/${FILENAME}" ] ; then
	cp "${LIB}/${ORIG}" "${TGT}/${FILENAME}" && echo "[copied]"
	# echo "[ NEW ]"
	else
	echo
	fi
	done