Created
July 31, 2023 11:48
-
-
Save Studentenfutter/5de77097610aa1e0751eb9286f3b2137 to your computer and use it in GitHub Desktop.
Extract text from copy protected PDFs with qpdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Function to decrypt and rename a PDF file | |
decrypt_and_rename() { | |
input_file="$1" | |
output_file="${input_file%.pdf}_decrypted.pdf" | |
qpdf --decrypt "$input_file" "$output_file" | |
} | |
# Prompt the user to enter the folder path | |
read -p "Enter the folder path to search for PDF files: " search_folder | |
# Check if the folder exists | |
if [ ! -d "$search_folder" ]; then | |
echo "Error: The specified folder does not exist." | |
exit 1 | |
fi | |
# Use 'find' to locate all PDF files in subdirectories | |
find "$search_folder" -type f -name '*.pdf' | while read -r pdf_file; do | |
echo "Processing: $pdf_file" | |
# Check if the PDF file has restrictions for text extraction | |
encryption_output=$(qpdf --show-encryption "$pdf_file" 2>&1) | |
if echo "$encryption_output" | grep -q "extract for any purpose: not allowed"; then | |
# Decrypt and rename the PDF file | |
decrypt_and_rename "$pdf_file" | |
echo "Decrypted and renamed to: ${pdf_file%.pdf}_decrypted.pdf" | |
else | |
echo "Already decrypted or not encrypted for extraction: $pdf_file" | |
fi | |
done | |
echo "All PDF files decrypted and renamed." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment