Created
February 21, 2015 17:52
-
-
Save mik01aj/fb7ed2af4088d5937c9e to your computer and use it in GitHub Desktop.
A script to filter out repeated partial slides in PDF presentations (especially lecture slides made in LaTeX Beamer) and generate a printer-friendly version.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Autor: Mikołaj Dądela <[email protected]> | |
inputPdf="$1" | |
threshold="10" | |
if [ "$inputPdf" == "" ]; then | |
echo "Usage: $0 input.pdf [output.pdf] [<threshold>]" | |
echo "" | |
echo "Default output filename for foo.pdf is foo_s.pdf." | |
echo "" | |
echo "Threshold is the minimal file size (in KB) of difference PNG. After" | |
echo "running this script have a look at diff files to decide what value" | |
echo "would be ok for you. The default is $threshold." | |
exit 1 | |
fi | |
if [ ! -r "$inputPdf" ]; then | |
echo "Unable to open $inputPdf." | |
exit 1 | |
fi | |
basename="${inputPdf%.pdf}" | |
basename="${basename%.PDF}" | |
outputPdf="${basename}_s.pdf" | |
if [ "$2" != "" ]; then | |
echo "$2" | grep -q "^[0-9]*$" | |
if [ "$?" == "0" ]; then #if it's a number | |
threshold="$2" | |
elif [ "$2" != "" ]; then | |
outputPdf="$2" | |
fi | |
fi | |
if [ "$3" != "" ] ; then | |
echo "$3" | grep -q "^[0-9]*$" | |
if [ "$?" == "0" ]; then #if it's a number | |
threshold="$3" | |
fi | |
fi | |
# ---------------------------------------------------------------------- | |
numPages=`pdftk "$inputPdf" dump_data | grep NumberOfPages | cut -d' ' -f2` | |
echo "$inputPdf contains $numPages pages." | |
# 1st phase: thumbnails | |
thumbsDir="${basename}_thumbs" | |
if [ ! -f "$thumbsDir/p-0.png" ]; then | |
mkdir -p "$thumbsDir" | |
for i in `seq 0 $((numPages/10))`; do | |
start=$((i*10)) | |
end=$((((i+1)*10)-1)) | |
if [ "$end" -ge "$numPages" ]; then | |
end=$((numPages-1)) | |
fi | |
echo -ne "\rGenerating thumbnails... $start-$end" | |
convert -resize 200x200 "$inputPdf[$start-$end]" $thumbsDir/p.png || exit | |
done | |
echo -e "\rGenerating thumbnails... done. (saved to $thumbsDir/p-*.png)" | |
else | |
echo "Using found $thumbsDir/p-*.png files." | |
fi | |
# 2nd phase: diffs | |
if [ ! -f "$thumbsDir/diff-0.png" ]; then | |
i=0 | |
while true; do | |
if [ ! -f "$thumbsDir/p-$((i+1)).png" ]; then | |
break | |
fi | |
echo -ne "\rComparing pages... $i " | |
# this blending mode catches only things that disappear | |
# on the next slide - this what appears or doesn't change | |
# comes out as white on the diff image. | |
convert "$thumbsDir/p-$((i+1)).png" -negate \ | |
"$thumbsDir/p-$i.png" \ | |
-compose Plus -composite \ | |
"$thumbsDir/diff-$i.png" || exit | |
i=$((i+1)) | |
done | |
echo -e "\rComparing pages... done. (saved to $thumbsDir/diff-*.png)" | |
else | |
echo "Using found $thumbsDir/diff-*.png files." | |
fi | |
# 3rd phase: selection | |
i=1 | |
pages="1" | |
echo -n "Selecting pages (with size threshold = ${threshold}KB)... $pages" | |
while [ -f "$thumbsDir/diff-$i.png" ]; do | |
if [ "`stat -c%s "$thumbsDir/diff-$i.png"`" -gt $((threshold*1024)) ]; then | |
echo -n " $((i+1))" | |
pages="$pages $((i+1))" | |
fi | |
i=$((i+1)) | |
done | |
pages="$pages $((i+1))" | |
echo " $((i+1)) done." | |
# 4th phase: output | |
echo -n "Writing $outputPdf... " | |
pdftk "$inputPdf" cat $pages output "$outputPdf" || exit | |
echo "done." | |
echo "Finished. You can now remove $thumbsDir/." | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment