-
-
Save c93614/280ee3e1bdef13ef530d79207eafdc0c to your computer and use it in GitHub Desktop.
Remove multiple text watermarks from a PDF file. Requires xxd and qpdf to work correctly.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Remove multiple text watermarks from a PDF file. Requires xxd and qpdf to work correctly. | |
# | |
# Usage: | |
# | |
# remove-pdf-watermark.sh "Your Input File.pdf" "Your Output File.pdf" [WATERMARK1] [WATERMARK2] [WATERMARK3] [...] | |
# | |
# For Example: | |
# | |
# remove-pdf-watermark.sh "Your Input File.pdf" "Your Output File.pdf" "Watermark 1" "Watermark 2" | |
# | |
# This is a more general (lesser dependencies, more functionality, but slower) version of | |
# https://gist.github.com/elfsternberg/a96883018d783cbbad7b454ecd0a7ffe | |
INPUT_FILENAME=$1 | |
OUTPUT_FILENAME=$2 | |
echo "Processing: $INPUT_FILENAME" | |
UNCOMPRESSED=`mktemp -t 'uncompressed'` | |
UNCOMPRESSED_HEX=`mktemp -t 'uncompressed-hex'` | |
UNMARKED_PRE=`mktemp -t 'unmarked-pre'` | |
qpdf --stream-data=uncompress --decode-level=all "$INPUT_FILENAME" $UNCOMPRESSED | |
echo " - Decompressing to: $UNCOMPRESSED" | |
xxd -ps -c 0 $UNCOMPRESSED > $UNCOMPRESSED_HEX | |
echo " - Hex dumping to: $UNCOMPRESSED_HEX" | |
rm $UNCOMPRESSED | |
for ARG in "$@"; do | |
COUNT=$((COUNT+1)) | |
if [[ $COUNT -gt 2 ]] | |
then | |
WATERMARK=$ARG | |
echo " - Processing Watermark: \"$WATERMARK\"" | |
WATERMARKLEN=${#WATERMARK} | |
WATERMARK_HEX=$(echo -n $WATERMARK | xxd -p -c 0) | |
BLANKS=$(printf %${WATERMARKLEN}s) | |
BLANKS_HEX=$(echo -n "$BLANKS" | xxd -p -c 0) | |
NUM_OCCURENCES_=$(grep -o $WATERMARK_HEX $UNCOMPRESSED_HEX | wc -l) | |
NUM_OCCURENCES=$((0+$NUM_OCCURENCES_)) | |
echo " * Number of occurences: $NUM_OCCURENCES" | |
if [[ $NUM_OCCURENCES -gt 0 ]] | |
then | |
echo " * Replacing with $WATERMARKLEN blanks..." | |
sed -i -e "s/$WATERMARK_HEX/$BLANKS_HEX/g" $UNCOMPRESSED_HEX | |
echo " * Replacement done." | |
NUM_OCCURENCES_=$(grep -o $WATERMARK_HEX $UNCOMPRESSED_HEX | wc -l) | |
NUM_OCCURENCES=$((0+$NUM_OCCURENCES_)) | |
echo " * Final number of occurences: $NUM_OCCURENCES" | |
fi | |
fi | |
done | |
xxd -r -p -c 0 $UNCOMPRESSED_HEX $UNMARKED_PRE | |
rm $UNCOMPRESSED_HEX | |
echo " - Reverting from hex dump to: $UNMARKED_PRE" | |
qpdf --stream-data=compress $UNMARKED_PRE "$OUTPUT_FILENAME" | |
rm $UNMARKED_PRE | |
echo " - Output written to: $OUTPUT_FILENAME" | |
echo | |
echo | |
# NO WARRANTY | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | |
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment