Created
July 30, 2023 16:55
-
-
Save mark-e-deyoung/322861fb0586a9a935383cfc2ab40ea7 to your computer and use it in GitHub Desktop.
bash script to convert all PDF in the current directory to txt using pdftotext
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check if pdftotext is installed | |
if ! command -v pdftotext &> /dev/null | |
then | |
echo "pdftotext could not be found. Please install it and run the script again." | |
exit | |
fi | |
# Make bash case-insensitive to file extensions | |
shopt -s nocaseglob | |
# For each PDF file in the current directory | |
for file in *.pdf *.PDF | |
do | |
# Check if the file is a PDF | |
if file -i "$file" | grep -q application/pdf | |
then | |
# Convert the file to text | |
pdftotext "$file" | |
echo "Converted $file to text." | |
else | |
echo "$file is not a PDF." | |
fi | |
done | |
# Reset bash case sensitivity | |
shopt -u nocaseglob | |
echo "Conversion complete." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment