Last active
November 25, 2020 20:05
-
-
Save darcyparker/016930578862a407e0f0 to your computer and use it in GitHub Desktop.
Search PDFs from command line
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
#Search pdfs: a wrapper for pdfgrep and pdftotext | |
#http://github.com/darcyparker | |
_usage(){ | |
local _filename=${0##*/} | |
cat << END_USAGE | |
Usage: $_filename | |
Search current directory: | |
$_filename <search pattern> | |
Search a specific directory: | |
$_filename <directory> <search pattern> | |
END_USAGE | |
exit 2 | |
} | |
_requirements(){ | |
cat << END_REQUIREMENTS | |
Requirements: | |
This requires pdfgrep or (pdftotext and grep) | |
pdfgrep is preferred because it reports page numbers accurately | |
Tip: On linux "apt-get install pdfgrep" | |
Tip: On linux "pdftotext" is a part of poppler-utils or xpdf-utils | |
: "apt-get install poppler-utils" | |
: "apt-get install xpdf-utils" | |
END_REQUIREMENTS | |
exit 1 | |
} | |
_searchWithPdfToText(){ | |
find "$1" -name "*.pdf" -type f -print0 | xargs -I{} -0 /usr/bin/env bash -c "pdftotext '{}' - | grep --with-filename --label='{}' --color \"$2\"" | |
exit $? | |
} | |
_searchWithPdfgrep(){ | |
#find "$1" -name "*.pdf" -type f -print0 | xargs -I{} -0 pdfgrep -H -n "$2" '{}' | |
pdfgrep -H -n -r "$2" "$1" | |
exit $? | |
} | |
_search(){ | |
#First try pdfgrep | |
[ -x "$(command -v pdfgrep)" ] && _searchWithPdfgrep "$@" | |
#if there is no pdfgrep, use pdftotext | |
[ -x "$(command -v pdftotext)" ] && [ -x "$(command -v grep)" ] && _searchWithPdfToText "$@" | |
#if pdfgrep or pdftotext and grep are not available, then show requirements | |
_requirements | |
} | |
_main(){ | |
[ $# -eq 0 ] || [ $# -gt 2 ] && _usage "$0" $# | |
[ $# -eq 1 ] && local _searchdir=$PWD && local _searchpattern=$1 | |
[ $# -eq 2 ] && local _searchdir=$1 && local _searchpattern=$2 | |
_search "$_searchdir" "$_searchpattern" | |
} | |
_main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment