Last active
May 18, 2016 20:38
-
-
Save bpj/f53b80cf993d40555c11421d12b78d01 to your computer and use it in GitHub Desktop.
Pandoc filter which keeps/removes divs/spans which have some class
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Pandoc filter which removes divs and/or spans according to whether they | |
have certain classes which are listed in metadata or not. There are four | |
relevant metadata entries: | |
- incl_div | |
- excl_div | |
- incl_span | |
- excl_span | |
They are used on the command line like this: | |
:: | |
$ pandoc -F pandoc-incl-excl.py -M incl_span=foo -M incl_span=bar | |
This will cause all spans which do *not* have any of the classes ``foo`` | |
or ``bar`` to be removed: | |
:: | |
This is <span class="foo">seen</span><span class="quux">unseen</span>. | |
becomes ``This is seen.`` | |
You can alternatively invert the logic: | |
:: | |
$ pandoc -F pandoc-incl-excl.py -M excl_span=foo -M excl_span=bar | |
This will cause spans which *have* one of the classes ``foo`` or ``bar`` | |
to be removed while all *other* spans areleft in place: | |
:: | |
This is <span class="foo">unseen</span><span class="quux">seen</span>. | |
Note that ``incl_div`` overrides ``excl_div`` and ``incl_span`` | |
overrides ``excl_span``; i.e. if any ``incl_div`` has been defined all | |
``excl_div`` will be ignored, and similarly for spans, but separately | |
for divs and spans, so that you can include only certain divs and | |
exclude certain spans in the same run, or vice versa. | |
Note that any unlisted classes will simply be ignored, so you | |
can't inhibit inclusion or exclusion by giving the same div or | |
span more than one class, but multiple classes can be used to | |
cause different intersecting groups of divs or spans to be | |
included or excluded on different runs. | |
Lastly the inclusions or inclusions can be given as lists in a metadata | |
block: | |
:: | |
--- | |
incl_div: | |
- foo | |
- bar | |
... | |
this may actually be useful as you can specify files with different | |
metadata blocks as input files to pandoc: | |
:: | |
$ pandoc -F pandoc-incl-excl.py custom-meta.yaml document.md | |
""" | |
import pandocfilters as pf | |
# from pprint import PrettyPrinter | |
# import sys | |
# pp = PrettyPrinter(indent=4, stream=sys.stderr) | |
def is_elem(thing, *names): | |
if not isinstance(thing, dict): | |
return False | |
if not 't' in thing: | |
return False | |
if names and not thing['t'] in names: | |
return False | |
if not 'c' in thing: | |
return False | |
return True | |
def plain_list(thing): | |
return map(lambda i: pf.stringify([i]), thing) | |
saw_meta = False | |
data = { | |
'div': { | |
'incl': [], | |
'excl': [], | |
}, | |
'span': { | |
'incl': [], | |
'excl': [], | |
} | |
} | |
no_str_or_list = 'Metadata {0} must be MetaString or MetaList'.format | |
def filter_func(key, val, fmt, meta): | |
global saw_meta, data | |
if not saw_meta: | |
for t in ['div', 'span']: | |
for x in ['incl', 'excl']: | |
k = x + '_' + t | |
if not k in meta: | |
continue | |
if is_elem(meta[k], 'MetaString'): | |
data[t][x] = [ meta[k]['c'] ] | |
elif is_elem(meta[k], 'MetaList'): | |
data[t][x] = plain_list(meta[k]['c']) | |
else: | |
no_str_or_list(k) | |
saw_meta = True | |
for k in [ 'div', 'span' ]: | |
if k.title() == key: | |
for cls in val[0][1]: | |
if data[k]['incl']: | |
if cls in data[k]['incl']: | |
return None | |
else: | |
return [] | |
elif data[k]['excl']: | |
if cls in data[k]['excl']: | |
return [] | |
else: | |
return None | |
if __name__ == "__main__": | |
pf.toJSONFilter(filter_func) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment