Created
October 30, 2023 08:37
-
-
Save afrendeiro/b378da44b1ec6def3d253c5a962b6909 to your computer and use it in GitHub Desktop.
Rasterize PDF files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Terminate on first error | |
set -e | |
# Check if input file is provided | |
if [ -z "$1" ]; then | |
echo "Usage: $0 <input_file.pdf>" | |
exit 1 | |
fi | |
# Get the input file name and its directory | |
input_file="$1" | |
input_dir=$(dirname -- "$input_file") | |
base_name=$(basename -- "$input_file") | |
name_no_ext="${base_name%.pdf}" | |
# Create a temporary directory in /tmp | |
tmp_dir=$(mktemp -d -p "$HOME") | |
# Burst the original PDF into individual pages | |
pdftk "$input_file" burst output "${tmp_dir}/${name_no_ext}_page_%04d.pdf" | |
# # Loop over each page and rasterize it | |
# for page in ${tmp_dir}/${name_no_ext}_page_*.pdf; do | |
# convert -density 100 "$page" -quality 90 "${tmp_dir}/raster_$(basename -- "$page")" | |
# done | |
# Parallel | |
export tmp_dir | |
find "${tmp_dir}" -name "${name_no_ext}_page_*.pdf" | parallel --eta 'convert -density 300 {} -quality 90 ${tmp_dir}/raster_{/}' | |
# Combine the rasterized pages into one PDF | |
output_file="${input_dir}/${name_no_ext}.rasterized.pdf" | |
pdftk ${tmp_dir}/raster_${name_no_ext}_page_*.pdf cat output "$output_file" | |
# Clean up the individual pages and metadata | |
rm -r "$tmp_dir" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment