Skip to content

Instantly share code, notes, and snippets.

@tdtds
Created December 5, 2010 03:00
Show Gist options
  • Save tdtds/728718 to your computer and use it in GitHub Desktop.
Save tdtds/728718 to your computer and use it in GitHub Desktop.
#!/bin/sh
#
# Copyright (c) 2010 Shinnosuke Suzuki <[email protected]>
# You can distribute this file under the GPL.
#
# Modified by TADA Tadashi <[email protected]>
#
FILE=$1
if [ -z $FILE ]; then
echo usage: pdf2kindle PDFfile [top bottom left right]
exit
elif [ ! -e $FILE ]; then
echo "$FILE is not found."
exit
fi
echo $(basename $FILE .pdf)
TOP=$(($2+10))
BOTTOM=$(($3+10))
LEFT=$(($4+10))
RIGHT=$(($5+10))
# 画像補正の並列数
# 負荷がキツイなら下げる
PARA=2
TMPDIR=./$(basename $FILE .pdf)
mkdir -p $TMPDIR/ 2>/dev/null
mkdir $TMPDIR/pgm 2>/dev/null
mkdir $TMPDIR/png 2>/dev/null
mkdir $TMPDIR/pdf 2>/dev/null
mkdir $TMPDIR/pgm_err 2>/dev/null
touch $TMPDIR/crop.txt
echo "TOP=$TOP" >> $TMPDIR/crop.txt
echo "BOTTOM=$BOTTOM" >> $TMPDIR/crop.txt
echo "LEFT=$LEFT" >> $TMPDIR/crop.txt
echo "RIGHT=$RIGHT" >> $TMPDIR/crop.txt
pdf2pgm() {
echo -n "extracting image files from source PDF.."
pdftoppm -gray $FILE $TMPDIR/pgm/pdf2mobi
echo '.'
}
pgm2png() {
echo -n "cropping image files"
rm -f $TMPDIR/convert_error.log
touch $TMPDIR/convert_error.log
i=0
ls $TMPDIR/pgm/*.pgm \
| while read F
do
echo -n '.'
(
# 上下左右の裁断と自動トリム。
# TODO:実行速度が遅いので、どうにかしてOpenCV化したい
convert \
$TMPDIR/pgm/$(basename $F) \
-level '0%,100%' \
-chop ${LEFT}x${TOP} -flip -flop \
-chop ${RIGHT}x${BOTTOM} -flip -flop \
-fuzz 50% -trim \
-quality 0 \
$TMPDIR/png/$(basename $F .pgm).png \
;
) &
# 同時に$PARA並行までに制限
i=$(($i+1))
if [ $i -eq $PARA ]; then
i=0
wait
fi
done 2>> $TMPDIR/convert_error.log
wait
echo '.'
TO=$TMPDIR/pgm_err
echo -n "getting error information as $TO..."
cat $TMPDIR/convert_error.log \
| tr '`' '\n' \
| tr "'" '\n' \
| grep -E '\.pgm$' \
| while read F
do
ERROR_PPM=$F
ERROR_PNG=$(echo $F | sed -e 's/\/pgm\//\/png\//g' | sed -e 's/\.pgm$/\.png/g')
cp $ERROR_PPM $ERROR_PNG $TO/
done
if [ 0 -ne $(find $TO -type f | wc -l) ];then
echo "-> if error files not empty, try to decrease -fuzz vlue."
find $TO
else
echo "-> no error."
fi
}
dumpmetadata() {
if [ ! -e $TMPDIR/pdf/metadata.txt ]; then
echo "extracting PDF metadata..."
pdftk $FILE dump_data output $TMPDIR/pdf/metadata.txt
fi
}
png2pdf() {
echo -n "generationg destination PDF"
i=0
while [ $i != 10 ]; do
echo -n "."
if [ 0 -ne $(ls $TMPDIR/png/pdf2mobi-${i}??.png 2>/dev/null | wc -l) ];then
convert $TMPDIR/png/pdf2mobi-${i}??.png $TMPDIR/pdf/$i.pdf
fi
i=$(($i+1))
done
rm -f $TMPDIR/$(basename $FILE .pdf).out.tmp.pdf
pdftk $TMPDIR/pdf/*.pdf \
cat output $TMPDIR/$(basename $FILE .pdf).out.tmp.pdf
pdftk $TMPDIR/$(basename $FILE .pdf).out.tmp.pdf \
update_info $TMPDIR/pdf/metadata.txt \
output $TMPDIR/$(basename $FILE .pdf).out.pdf
mv $TMPDIR/$(basename $FILE .pdf).out.pdf $TMPDIR/../
echo '.'
ls -l $TMPDIR/../$(basename $FILE .pdf).out.pdf
}
dumpmetadata
pdf2pgm
pgm2png
png2pdf
@tdtds
Copy link
Author

tdtds commented Oct 6, 2011

このスクリプトは古くなっており、すでにメンテされていません。
最新のKindlize手法はRakefileによって実現されています。以下を参照して下さい。

自炊PDFをKindle3向けにトリミングするRakefile
https://gist.github.com/749471

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment