Created
July 3, 2012 12:15
-
-
Save iHiD/3039396 to your computer and use it in GitHub Desktop.
PDF to SVG components
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set up a couple of paths | |
lib_directory = "#{Rails.root}/lib/svgs/" | |
assets_directory = "#{Rails.root}/app/assets/images/pages/" | |
pdf_path = "/Users/iHiD/Downloads/SightSound.pdf" | |
(1..18).each do |id| | |
# Setup image specific paths | |
orignal_filename = "#{lib_directory}#{id}.svg" | |
text_filename = "#{assets_directory}#{id}_text.svg" | |
images_filename = "#{assets_directory}#{id}_images.svg" | |
images_png_filename = "#{assets_directory}#{id}_images.png" | |
images_jpg_filename = "#{assets_directory}#{id}_images.jpg" | |
# Convert PDF to SVG using any third party library. | |
# There are loads of free ones. | |
# Poppler is a good place to start. Here I'm using pdf2svg | |
`pdf2svg #{pdf_path} #{orignal_filename} #{id}` | |
# Extract thumbnail from SVG | |
`/Applications/Inkscape.app/Contents/Resources/bin/inkscape -e #{assets_directory}/thumbs/#{id}.png -w 110 -b white -z #{orignal_filename}` | |
# Split an SVG into an image and a cut-down SVG. Nokogiri is just an XML parser. | |
svg = File.read(orignal_filename) | |
text_svg = Nokogiri::XML.parse(svg) | |
# Remove all images, graphics and paths from the text part... | |
text_svg.css("image").remove() | |
text_svg.css("g").each do |node| | |
node.remove() if node.keys.include?("clip-path") | |
end | |
text_svg.css("path").each do |node| | |
if node.keys.include?("style") | |
if node.attributes["style"].value.include?("fill-opacity:1") | |
node.remove() | |
end | |
end | |
end | |
# ...a turn this into a compressed svgz | |
File.open(text_filename, 'w') {|f| f.write(text_svg.to_xml) } | |
`gzip -c #{text_filename} > #{text_filename}z` | |
# Strip the text out of the images part... | |
images_svg = svg.gsub(/<symbol overflow="visible" id="[^"]*">\s*<path[^>]*>\s*<\/symbol>\s*/, '') do |match| | |
match.include?("glyph") ? "" : match | |
end | |
images_svg.gsub!(/<use xlink:href="#glyph[^\/]*\/>\s*/, '') | |
File.open(images_filename, 'w') {|f| f.write(images_svg) } | |
# ...and convert it to a clean image (convert is part of ImageMagick) | |
`/Applications/Inkscape.app/Contents/Resources/bin/inkscape -e #{images_png_filename} -z #{images_filename}` | |
`convert #{images_png_filename} -background white -flatten #{images_jpg_filename}` | |
# Remove tmp files | |
`rm #{images_filename}` | |
`rm #{images_png_filename}` | |
# Get an XML file of the words, again using any one of lots of free libraries. | |
`pdftotext -bbox -f #{id} -l #{id} #{pdf_path} #{Rails.root}/db/seed_data/words_#{id}.xml` | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment