Created
August 22, 2011 02:36
-
-
Save glamrock/1161540 to your computer and use it in GitHub Desktop.
Interesting script from @pmocek for renaming certain .PDFs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# See discussion at <http://twitter.theinfo.org/95208900873302016> | |
# Search a directory for files with names ending in ".pdf". | |
# For each of those: | |
# If a file with similar name, ending in ".txt" instead of ".pdf" exists: | |
# If the text file contains a line beginning with "T1": | |
# Read the rest of the line to find a title | |
# Write the PDF's metadata to a temporary file | |
# If the metadata already includes a title: | |
# Replace title with the one found in the text file | |
# Otherwise: | |
# Add title found in text file | |
# Update the PDF with values from the temp file, leaving original renamed like "*.pdf.orig" | |
search_dir="/tmp" | |
find "$search_dir" -name '*.pdf' | while read pdf_file; do | |
text_file="${pdf_file/%.pdf/.txt}" | |
if [ -f "$text_file" ]; then | |
# Corresponding text file exists | |
new_title="$(grep '^T1' "$text_file" | sed -e's/^T1 \+- \+\(.*\)/\1/')" | |
if [ -n "$new_title" ]; then | |
# Extracted a title from external metadata | |
old_metadata=$( mktemp ) | |
new_metadata=$( mktemp ) | |
pdftk "$pdf_file" dump_data output $old_metadata | |
if grep -q '^InfoKey: Title' $old_metadata ; then | |
# Replace existing title in PDF metadata with new title | |
cat $old_metadata | while read -r key value ; do | |
case "$key" in | |
"InfoKey:" ) | |
infokey="$value" | |
;; | |
"InfoValue:" ) | |
if [ "$infokey" == "Title" ]; then | |
value="$new_title" | |
unset infokey | |
fi | |
;; | |
esac | |
echo "$key" "$value" >> $new_metadata | |
done | |
else | |
# Add new title to PDF metadata | |
cp $old_metadata $new_metadata | |
echo "InfoKey: Title" >> $new_metadata | |
echo "InfoValue: $new_title" >> $new_metadata | |
fi | |
#diff -u $old_metadata $new_metadata | |
pdftk "$pdf_file" update_info "$new_metadata" output "${pdf_file}.new" | |
mv "$pdf_file" "${pdf_file}.orig" | |
mv "${pdf_file}.new" "$pdf_file" | |
rm $old_metadata | |
rm $new_metadata | |
else | |
echo "No T1 line in $text_file" >&2 | |
fi | |
else | |
echo "No corresponding metadata file for $pdf_file" >&2 | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment