Created
November 30, 2016 17:25
-
-
Save jgraham909/65d63d99155837a5e2d6df433da6a5c6 to your computer and use it in GitHub Desktop.
Given a URL to a PDF convert it to text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script requires curl and gs. | |
# Set ghostscript & curl options. | |
GSOPTIONS="-dNOPAUSE -dBATCH -sDEVICE=txtwrite -sOutputFile=%stdout -q" | |
CURLOPTIONS="-s" | |
# Check we have the appropriate number of parameters. | |
if [ "$#" -ne 1 ]; then | |
echo "Invalid number of parameters." | |
exit 1 | |
fi | |
# Create temp directory and cleanup on exit. | |
TMPDIR=$(mktemp -d) || exit 1 | |
trap "cd / ; rm -rf '$TMPDIR'" EXIT | |
WORKFILE=pdftotext.pdf | |
# Get the requested file | |
curl $CURLOPTIONS "$1" > "$TMPDIR/$WORKFILE" || exit 1 | |
# Convert to text | |
gs $GSOPTIONS "$TMPDIR/$WORKFILE" || exit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment