-
-
Save jeromerobert/3996eca3acd12e4c3d40 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python | |
""" | |
Pandoc filter to convert svg files to pdf as suggested at: | |
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316 | |
""" | |
__author__ = "Jerome Robert" | |
import mimetypes | |
import subprocess | |
import os | |
import sys | |
from pandocfilters import toJSONFilter, Str, Para, Image | |
fmt_to_option = { | |
"latex": ("--export-pdf","pdf"), | |
"beamer": ("--export-pdf","pdf"), | |
#use PNG because EMF and WMF break transparency | |
"docx": ("--export-png", "png"), | |
#because of IE | |
"html": ("--export-png", "png") | |
} | |
def svg_to_any(key, value, fmt, meta): | |
if key == 'Image': | |
if len(value) == 2: | |
# before pandoc 1.16 | |
alt, [src, title] = value | |
attrs = None | |
else: | |
attrs, alt, [src, title] = value | |
mimet,_ = mimetypes.guess_type(src) | |
option = fmt_to_option.get(fmt) | |
if mimet == 'image/svg+xml' and option: | |
base_name,_ = os.path.splitext(src) | |
eps_name = base_name + "." + option[1] | |
try: | |
mtime = os.path.getmtime(eps_name) | |
except OSError: | |
mtime = -1 | |
if mtime < os.path.getmtime(src): | |
cmd_line = ['inkscape', option[0], eps_name, src] | |
sys.stderr.write("Running %s\n" % " ".join(cmd_line)) | |
subprocess.call(cmd_line, stdout=sys.stderr.fileno()) | |
if attrs: | |
return Image(attrs, alt, [eps_name, title]) | |
else: | |
return Image(alt, [eps_name, title]) | |
if __name__ == "__main__": | |
toJSONFilter(svg_to_any) |
Since no-one else commented so far: have you tried to update your Pandoc to the current version?
You can do so by cabal
-installing it into the local user path $HOME/.cabal/
without sacrificing your system's package manager installed Pandoc instance -- at least on Linux and Mac OS X:
cabal update
cabal install cabal-install
export PATH=${HOME}/.cabal/bin:${PATH}
cabal install pandoc pandoc-citeproc
This should give you Pandoc 1.15.0.6 as ${HOME}/.cabal/bin/pandoc
.
For some reason it stopped working for me after upgrading pandoc from a pre-1.16 to the current 1.16.0.2 version of Pandoc.
So far: At the line alt, [src, title] = value
it fails to unpack the tuple. When I assign the values via the array-address syntax, like value[1]
it runs again, and, as far as I can see, for all images. It fails though with a When expecting a product of 3 values, encountered an Array of 2 elements instead
after.
I had to tweak it to make it work with an up-to-date ArchLinux system. Here is my working version (needs pandocfilters
installed via Pip as AUR version is out of date).
@jdittrich I had the same problem as you had, maybe this version is working for you?
#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""
__author__ = "Jerome Robert"
import mimetypes
import subprocess
import os
import sys
from pandocfilters import toJSONFilter, Image
# TODO add emf export if fmt=="docx" ?
fmt_to_option = {
"latex": ("--export-pdf", "pdf"),
"beamer": ("--export-pdf", "pdf"),
# because of IE
"html": ("--export-png", "png")
}
def svg_to_any(key, value, fmt, meta):
if key == 'Image':
alt, [src, title] = value[1], value[-1]
mimet, _ = mimetypes.guess_type(src)
option = fmt_to_option.get(fmt)
if mimet == 'image/svg+xml' and option:
base_name, _ = os.path.splitext(src)
eps_name = base_name + "." + option[1]
try:
mtime = os.path.getmtime(eps_name)
except OSError:
mtime = -1
if mtime < os.path.getmtime(src):
cmd_line = ['inkscape', option[0], eps_name, src]
sys.stderr.write("Running %s\n" % " ".join(cmd_line))
subprocess.call(cmd_line, stdout=sys.stderr.fileno())
return Image(['', [], []], alt, [eps_name, title])
if __name__ == "__main__":
toJSONFilter(svg_to_any)
A recent pandoc update added an Attr
field to images, so there's an extra argument being passed around which was messing things up.
@Phyks' version works nicely for now, though.
@swt30 Indeed, I just dirty-fixed it so that it worked, but did not handle the Attr
field. Here is a fully working version, tested with extra attributes (will not work for Pandoc < 1.16 though):
#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""
__author__ = "Jerome Robert"
import mimetypes
import subprocess
import os
import sys
from pandocfilters import toJSONFilter, Image
# TODO add emf export if fmt=="docx" ?
fmt_to_option = {
"latex": ("--export-pdf", "pdf"),
"beamer": ("--export-pdf", "pdf"),
# because of IE
"html": ("--export-png", "png")
}
def svg_to_any(key, value, fmt, meta):
if key == 'Image':
attrs, alt, [src, title] = value
mimet, _ = mimetypes.guess_type(src)
option = fmt_to_option.get(fmt)
if mimet == 'image/svg+xml' and option:
base_name, _ = os.path.splitext(src)
eps_name = base_name + "." + option[1]
try:
mtime = os.path.getmtime(eps_name)
except OSError:
mtime = -1
if mtime < os.path.getmtime(src):
cmd_line = ['inkscape', option[0], eps_name, src]
sys.stderr.write("Running %s\n" % " ".join(cmd_line))
subprocess.call(cmd_line, stdout=sys.stderr.fileno())
return Image(attrs, alt, [eps_name, title])
if __name__ == "__main__":
toJSONFilter(svg_to_any)
I had a problem with whitspace in the filename. The easiest way for to remove them. Also when using this script in a command chain nothing would happen. So i added the option to convert as a default action if the output format was unknown.
#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""
__author__ = "Jerome Robert"
import mimetypes
import subprocess
import os
import sys
from pandocfilters import toJSONFilter, Image
# TODO add emf export if fmt=="docx" ?
fmt_to_option = {
"latex": ("--export-pdf", "pdf"),
"beamer": ("--export-pdf", "pdf"),
# because of IE
"html": ("--export-png", "png")
}
def svg_to_any(key, value, fmt, meta):
if key == 'Image':
attrs, alt, [src, title] = value
mimet, _ = mimetypes.guess_type(src)
option = fmt_to_option.get(fmt, ("--export-pdf", "pdf"))
if mimet == 'image/svg+xml' and option:
base_name, _ = os.path.splitext(src)
eps_name = base_name + "." + option[1]
eps_name = eps_name.replace("%20", "")
src = src.replace("%20", " ")
try:
mtime = os.path.getmtime(eps_name)
except OSError:
mtime = -1
if mtime < os.path.getmtime(src):
cmd_line = ['inkscape', option[0], eps_name, src]
sys.stderr.write("Running %s\n" % " ".join(cmd_line))
subprocess.call(cmd_line, stdout=sys.stderr.fileno())
return Image(attrs, alt, [eps_name.replace("%20", " "), title])
if __name__ == "__main__":
toJSONFilter(svg_to_any)
Thanks for doing this. Saved me today.
One thing: it's obvious, perhaps, but it bit me: you need to have inkscape available via the command line.
I used homebrew to install it. (I also already have the GUI version installed, so maybe a simple linking of that binary into my path would have worked. Hmmmm…)
Also, I've got this on my local system via the desktop app. Would someone mind updating the gist to this latest version that seems to work?
Does it not work with on-line images? I'm getting a file not found error for something.
Any thoughts on getting this to handle on-line images as well?
Hi, on my updated Arch linux system the filter gives the following error:
Traceback (most recent call last):
File "../pandoc-svgmk2.py", line 43, in <module>
toJSONFilter(svg_to_any)
File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 46, in toJSONFilter
toJSONFilters([action])
File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 76, in toJSONFilters
altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
File "/usr/lib/python3.5/site-packages/pandocfilters.py", line 76, in <lambda>
altered = reduce(lambda x, action: walk(x, action, format, doc[0]['unMeta']), actions, doc)
KeyError: 0
pandoc: Error running filter ../pandoc-svgmk2.py
Filter returned error status 1
Did some of you encounter the same error or has some idea on how to fix it?
I am using RedX2501 version of pandoc-svg.py with pandoc 1.18.
Yes, simply rewrite the PKGBUILD from AUR to use version 1.4.1 of the pandocfilters package - see https://pypi.python.org/pypi/pandocfilters for the download link to replace it.
what about pdf_tex using the option --export-latex? This generates two files; a pdf and a tex file. The tex file have an extension .pdf_tex
needs to be included in the document first with \input{}
. This tex file will import the pdf file and place text on top of the image. This allows the image to have text formatted with the latex document
handle online image
#! /usr/bin/env python
"""
Pandoc filter to convert svg files to pdf as suggested at:
https://github.com/jgm/pandoc/issues/265#issuecomment-27317316
"""
__author__ = "Jerome Robert"
import mimetypes
import subprocess
import os
import sys
import urllib
import re
from pandocfilters import toJSONFilter, Str, Para, Image
fmt_to_option = {
"latex": ("--export-pdf","pdf"),
"beamer": ("--export-pdf","pdf"),
#use PNG because EMF and WMF break transparency
"docx": ("--export-png", "png"),
#because of IE
"html": ("--export-png", "png")
}
def svg_to_any(key, value, fmt, meta):
if key == 'Image':
if len(value) == 2:
# before pandoc 1.16
alt, [src, title] = value
attrs = None
else:
attrs, alt, [src, title] = value
if re.match('https?\://',src):
srcm = re.sub('\?.+','',src)
srcm = re.sub('\#.+','',srcm)
srcm = re.sub('/$','',srcm)
else:
srcm = src
mimet,_ = mimetypes.guess_type(srcm)
option = fmt_to_option.get(fmt)
if mimet == 'image/svg+xml' and option:
if re.match('https?\://',src):
bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
src,h = urllib.urlretrieve(src,bsnm)
base_name,_ = os.path.splitext(bsnm)
eps_name = base_name + "." + option[1]
try:
mtime = os.path.getmtime(eps_name)
except OSError:
mtime = -1
if mtime < os.path.getmtime(src):
cmd_line = ['inkscape', option[0], eps_name, src]
sys.stderr.write("Running %s\n" % " ".join(cmd_line))
subprocess.call(cmd_line, stdout=sys.stderr.fileno())
if attrs:
return Image(attrs, alt, [eps_name, title])
else:
return Image(alt, [eps_name, title])
if __name__ == "__main__":
toJSONFilter(svg_to_any)
An adjustment to @juji's reply: replace lines 47-51
if re.match('https?\://',src):
bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
src,h = urllib.urlretrieve(src,bsnm)
base_name,_ = os.path.splitext(bsnm)
with
if re.match('https?\://',src):
bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
src,h = urllib.urlretrieve(src,bsnm)
base_name,_ = os.path.splitext(bsnm)
else:
base_name, _ = os.path.splitext(src)
if re.match('https?\://',src):
bsnm = urllib.unquote(os.path.basename(srcm).encode('utf8'))
bsnm = re.sub('[^a-zA-Z0-9\.]','',bsnm)
src,h = urllib.urlretrieve(src,bsnm)
base_name,_ = os.path.splitext(bsnm)
eps_name = base_name + "." + option[1]
else:
base_name, _ = os.path.splitext(src)
eps_name = os.path.realpath(base_name + "." + option[1])
src = os.path.realpath(src)
one further adjustment. Inkscape crashes on macos ** (inkscape-bin:91102): WARNING **: Can't open file: image.svg (doesn't exist)
, caused by some weird sh wrapper around inkscape, that sets some weird working directory. Using os.realpath()
fixes that
For the new Inkscape 1.0 on Catalina --export-filename
has superseded --export-pdf
fmt_to_option = {
"latex": ("--export-filename","pdf"),
"beamer": ("--export-filename","pdf"),
#use PNG because EMF and WMF break transparency
"docx": ("--export-png", "png"),
#because of IE
"html": ("--export-png", "png")
}
--export-filename
should now be used in place of both --export-pdf
and --export-png
.
fmt_to_option = { "latex": ("--export-filename","pdf"), "beamer": ("--export-filename","pdf"), #use PNG because EMF and WMF break transparency "docx": ("--export-filename", "png"), #because of IE "html": ("--export-filename", "png") }
Hi. Thanks for the filter.
Unfortunately my pandoc goes into 100% loading loop with it. Have you any ideas what can be the reason? This is the command:
pandoc --version: