Last active
April 5, 2024 16:57
Extract vector images from funky PDF files
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Most vector images can be extracted as EPS from the command line using pdftocairo + sed. | |
# https://superuser.com/questions/302045/how-to-extract-vectors-from-a-pdf-file/884445 | |
# However, there was one file (created by LaTeX) that failed to extract because pdftocairo | |
# was rasterizing it into a bitmap, so I created a script that uses xml_grep to extract the | |
# image as an SVG file instead. | |
# b9 2019 | |
if [[ -z "$1" ]]; then | |
echo "Usage: $0 <filename.pdf> [pagenumber]" | |
exit 1 | |
fi | |
file="$1" | |
page="$2" | |
if [ "$page" ]; then | |
pageopt="-f $page -l $page" | |
fi | |
pdftocairo "$file" $pageopt -svg temp.svg | |
for surface in $(grep -o '<g id="surface[^"]*"' temp.svg | grep -o '".*"' | tr -d '"'); do | |
if [[ "$surface" == "surface1" ]]; then | |
#echo "Skipping surface1 which is entire page" | |
continue | |
fi | |
output="${file%.*}-$surface.svg" | |
echo "Extracting: $output" | |
( | |
echo "<svg>" | |
xml_grep --nowrap --pretty indented '//defs' temp.svg | |
xml_grep --nowrap --pretty indented 'g[@id="'"$surface"'"]' temp.svg | |
echo "</svg>" | |
) > $output | |
# Make it a valid SVG file with a proper bounding box | |
# You may need to `apt install librsvg2-bin` for this to work. | |
rsvg-convert $output -f svg -o temp.svg && mv temp.svg $output | |
done | |
if [[ -z "$output" ]]; then | |
echo "No additional surfaces found in this PDF." | |
fi | |
rm -f temp.svg |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment