Created
December 24, 2011 20:05
-
-
Save precious/1518232 to your computer and use it in GitHub Desktop.
This script trims white margins out of PDF document and packs it to DjVu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# vim: set fileencoding=utf-8 ts=2 sw=2 expandtab: | |
# PUBLIC DOMAIN | |
# This script trims white margins out of PDF document and packs it to DjVu. | |
# Author Sergei Astanin http://sovety.blogspot.com/ | |
# Modified by Vsevolod K. https://github.com/precious | |
### Settings | |
DPI=150 | |
MARGIN=10 | |
MODE=gray # mono or gray or color | |
COLUMNS=1 | |
### End of settings | |
prog=$0 | |
dpidefault=$DPI | |
CROP="" | |
modedefault=$MODE | |
type=djvu | |
resizeopt="" | |
quality=60 | |
function usage() { | |
me=`basename "$prog"` | |
monodefault="" | |
graydefault="" | |
colordefault="" | |
if [ $modedefault == "mono" ] ; then | |
monodefault="[default]" | |
elif [ $modedefault == "gray" ] ; then | |
graydefault="[default]" | |
else | |
colordefault="[default]" | |
fi | |
cat << END | |
Usage: $me [options] document.pdf | |
Options: | |
-f <int> the first page to process [default: 1] | |
-l <int> the last page to process | |
-d <int> resolution in DPI [default: $dpidefault] | |
-q|--quality <int> quality of images for generating pdf [default: 60] | |
-c|--columns <int> multi-column mode [default: $COLUMNS] | |
-p|--crop <tXbXlXrX> crop image's top, bottom, left and/or right edges | |
example: $me --crop b40r10 file.pdf | |
-m|--margin <int> borders width [default: $MARGIN] | |
-t|--type <str> type of result document - pdf/djvu [default: djvu] | |
-r|--resize <geomet> resize pages; geometry is WIDTH or xHEIGH or WIDTHxHEIGHT | |
or as presented at | |
http://www.imagemagick.org/script/command-line-processing.php#geometry | |
--mono bitonal compression (black and white only) $monodefault | |
--gray DjVuPhoto compression (shades of gray images) $graydefault | |
--color DjVuPhoto compression (color images) $colordefault | |
-h|--help print this message | |
END | |
} | |
opts=`getopt -l "help,mono,gray,grey,color,colour,columns,crop,margin:type:resize:quality:" \ | |
"hf:l:d:c:p:m:t:r:q:" "$@"` && eval set -- "$opts" | |
while [ $# -gt 0 ] ; do | |
case "$1" in | |
-h|--help) usage ; exit 0 ;; | |
-f) frompage=$2 ; shift 2 ;; | |
-l) topage=$2 ; shift 2 ;; | |
-d) DPI=$2 ; shift 2 ;; | |
--mono) MODE=mono ; shift ;; | |
--color|--colour) MODE=color ; shift ;; | |
--gray|--grey) MODE=gray ; shift ;; | |
--columns|-c) COLUMNS=$2 ; shift 2 ;; | |
--crop|-p) CROP=$2 ; shift 2 ;; | |
--type|-t) type=$2 ; shift 2 ;; | |
--quality|-q) quality=$2 ; shift 2 ;; | |
--resize|-r) resize=$2 ; shift 2 ;; | |
-m|--margin) MARGIN=$2 ; shift 2 ;; | |
*) shift ; break ;; | |
esac | |
done | |
if [ $# -ne 1 ] ; then | |
printf "Filename is required.\n" | |
usage | |
exit 1 | |
fi | |
# absolute paths to input PDF and output DjVu files | |
pdf="`pwd`/$1" | |
filename="${pdf%.pdf}" | |
tmpdir=`mktemp -d /tmp/pagesXXXXX` | |
if [ "x" == "x${frompage}" ] ; then # first page not defined | |
frompage=1 | |
fi | |
if [ "x" == "x${topage}" ] ; then # last page not defined | |
topage=`LANG=C pdfinfo "$pdf" | awk '/^Pages:/ {print $NF;}'` | |
fi | |
if [ $MODE == "mono" ] ; then # use bitonal compression | |
monoopt="-mono" | |
pmext="pbm" | |
compress="cjb2" | |
elif [ $MODE == "gray" ] ; then # use grayscale compression | |
monoopt="" | |
pmext="ppm" | |
compress="c44 -crcbnone" | |
else # use DjVuPhoto compression (suitabable for color and grayscale images) | |
monoopt="" | |
pmext="ppm" | |
compress="c44" | |
fi | |
if [ ! -z "$CROP" ] ; then # process crop options and write them into appropriate variables | |
for param in t b l r ; do | |
export $param=`echo $CROP|grep -Po "(?<=$param)\d+"` | |
done | |
if [ "$l" -o "$t" ] ; then cropopt="$cropopt -crop +${l:-0}+${t:-0}" ; fi | |
if [ "$r" -o "$b" ] ; then cropopt="$cropopt -crop -${r:-0}-${b:-0}" ; fi | |
if [ -z "$cropopt" ] ; then echo "invalid crop arguments" ; usage ; exit 1 ; fi | |
fi | |
if [ ! -z "$resize" ] ; then # set appropriate resize option for mogrify | |
resizeopt="-resize $resize" | |
fi | |
# rasterize all pages: pdftoppm -r $DPI | |
# cut margins: mogrify ... -trim +repage | |
# add narrow margin: mogrify ... -bordercolor white -border $MARGIN | |
# compress with color-enabled djvu: c44 | |
# display progress with page numbers (skipped pages indicated with "∙") | |
( cd $tmpdir && | |
( | |
# for every page | |
i=$frompage | |
while [ $i -le $topage ] ; do | |
echo -n "$i" && \ | |
pdftoppm -r $DPI $monoopt -f $i -l $i "$pdf" page && \ | |
fname=`ls -1t page-*.$pmext | head -1` && \ | |
if [ ! -z "$cropopt" ] ; then mogrify -quiet $cropopt +repage "$fname" ; fi && \ | |
mogrify -quiet -trim +repage $resizeopt "$fname" && \ | |
( # skip empty pages | |
if [ `identify -format "%k" "$fname"` -gt 1 ] ; then | |
####### mogrify -quiet -resize 800 "$fname" | |
if [ $COLUMNS -ne 1 ] ; then # cut page in COLUMNS x n pieces | |
w=$(($(identify -format "%w" "$fname")/COLUMNS)) # crop width | |
h=$(((2*MARGIN+w)*4/3-2*MARGIN)) # crop height, aspect 4:3 | |
pageh=$(identify -format "%h" "$fname") | |
n=$((pageh/h + 1)) # number of vertical slices | |
overlap=$(((h*n-pageh)/(n-1))) | |
dh=$((h-overlap)) | |
state=("/" "-" "\\" "|") | |
for c in `seq 0 $((COLUMNS-1))` ; do # for every column | |
for j in `seq 0 $((n-1))` ; do # for every partial image | |
pagename=`printf "%06d_%d_%02d.$pmext" $i $c $j` | |
djvuname=`printf "%06d_%d_%02d.djvu" $i $c $j` | |
convert "$fname" -crop ${w}x${h}+$((c*w))+$((j*dh)) \ | |
-bordercolor white -border $MARGIN "$pagename" | |
$compress -dpi $DPI "$pagename" "$djvuname" | |
rm "$pagename" | |
printf "\b%s" ${state[$(((c*n+j)%4))]} | |
done | |
done | |
printf " " | |
else # keep page as is | |
if [ "$MARGIN" != "0" ] ; then | |
mogrify -bordercolor white -border $MARGIN "$fname" | |
fi | |
case "$type" in | |
pdf) convert -quality $quality "$fname" `printf "%06d.jpg" $i` ;; | |
djvu) $compress -dpi $DPI "$fname" `printf "%06d.djvu" $i` ;; | |
esac | |
printf " " | |
fi | |
else | |
echo -n " skiped [empty page]" | |
fi | |
) && rm page-*.$pmext && i=$((i+1)) | |
done | |
) && ( | |
echo -e "\nmerging..." | |
case "$type" in | |
djvu) djvm -c "$filename.djvu" *.djvu ;; | |
pdf) convert *.jpg "${filename}_.pdf" ;; | |
esac | |
) | |
) && rm -rf "$tmpdir" || ( | |
#) || ( | |
printf "Failure\nTemporary directory left: %s\n" $tmpdir | |
exit 2 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment