Created
June 1, 2015 12:56
-
-
Save AndorChen/a07c591fed685fb7a80c to your computer and use it in GitHub Desktop.
Pandoc filter Ruby API http://about.ac/2015/06/docx-hack-guide.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'json' | |
class String | |
def camelize | |
string = self | |
string.split('_').map(&:capitalize).join('') | |
end | |
end | |
module PandocFilter | |
class Node | |
# key: node type | |
# value: expected argument number | |
NODES = { | |
# block elements | |
plain: 1, | |
para: 1, | |
code_block: 2, | |
raw_block: 2, | |
block_quote: 1, | |
ordered_list: 2, | |
bullet_list: 1, | |
definition_list: 1, | |
header: 3, | |
horizontal_rule: 0, | |
table: 5, | |
div: 2, | |
null: 0, | |
# inline elements | |
str: 1, | |
emph: 1, | |
strong: 1, | |
strikeout: 1, | |
superscript: 1, | |
subscript: 1, | |
small_caps: 1, | |
quoted: 2, | |
cite: 2, | |
code: 2, | |
space: 0, | |
line_break: 0, | |
math: 2, | |
raw_inline: 2, | |
link: 2, | |
image: 2, | |
note: 1, | |
span: 2 | |
} | |
class << self | |
def method_missing(name, *args) | |
raise "undefined #{name} node type" unless NODES.keys.include?(name) | |
unless args.size == NODES[name] | |
raise "#{name} expects #{NODES[name]} arguments, but given #{args.size}" | |
end | |
new(name.to_s.camelize, *args).to_hash | |
end | |
end | |
attr_reader :type | |
attr_reader :args | |
attr_reader :numargs | |
def initialize(type, *args) | |
@type = type | |
@args = args | |
@numargs = args.size | |
end | |
def to_hash | |
xs = case numargs | |
when 0 | |
[] | |
when 1 | |
args[0] | |
else | |
args | |
end | |
{'t': type, 'c': xs} | |
end | |
end | |
# Converts an action into a filter that reads a JSON-formatted | |
# pandoc document from stdin, transforms it by walking the tree | |
# with the action, and returns a new JSON-formatted pandoc document | |
# to stdout. The argument is a function action(key, value, format, meta), | |
# where key is the type of the pandoc object (e.g. 'Str', 'Para'), | |
# value is the contents of the object (e.g. a string for 'Str', | |
# a list of inline elements for 'Para'), format is the target | |
# output format (which will be taken for the first command line | |
# argument if present), and meta is the document's metadata. | |
# If the function returns None, the object to which it applies | |
# will remain unchanged. If it returns an object, the object will | |
# be replaced. If it returns a list, the list will be spliced in to | |
# the list to which the target object belongs. (So, returning an | |
# empty list deletes the object.) | |
# | |
# action Callable object | |
# | |
# Return Manuplated JSON | |
def self.process(&action) | |
doc = JSON.load($stdin.read) | |
if ARGV.size > 1 | |
format = ARGV[1] | |
else | |
format = "" | |
end | |
altered = self.walk(doc, format, doc[0]['unMeta'], &action) | |
JSON.dump(altered, $stdout) | |
end | |
# Walks the tree x and returns concatenated string content, | |
# leaving out all formatting. | |
def self.stringify(x) | |
result = [] | |
go = lambda do |key, val, format, meta| | |
if ['Str', 'MetaString'].include? key | |
result.push(val) | |
elsif key == 'Code' | |
result.push(val[1]) | |
elsif key == 'Math' | |
result.push(val[1]) | |
elsif key == 'LineBreak' | |
result.push(" ") | |
elsif key == 'Space' | |
result.push(" ") | |
end | |
end | |
self.walk(x, "", {}, &go) | |
result.join('') | |
end | |
# Returns an attribute list, constructed from the | |
# dictionary attrs. | |
def attributes(attrs) | |
attrs ||= {} | |
ident = attrs.fetch('id', '') | |
classes = attrs.fetch("classes", []) | |
keyvals = [] | |
attrs.keep_if { |k, v| k != "classes" && k != "id" }.each do |k, v| | |
keyvals << [k, v] | |
end | |
[ident, classes, keyvals] | |
end | |
# Walk a tree, applying an action to every object. | |
# Returns a modified tree. | |
def self.walk(x, format, meta, &action) | |
if x.is_a? Array | |
array = [] | |
x.each do |item| | |
if item.is_a?(Hash) && item.has_key?('t') | |
res = action.call(item['t'], item['c'], format, meta) | |
if res.nil? | |
array.push(self.walk(item, format, meta, &action)) | |
elsif res.is_a? Array | |
res.each { |z| array.push(self.walk(z, format, meta, &action)) } | |
else | |
array.push(self.walk(res, format, meta, &action)) | |
end | |
else | |
array.push(self.walk(item, format, meta, &action)) | |
end | |
end | |
return array | |
elsif x.is_a? Hash | |
hash = {} | |
x.each { |k, _| hash[k] = self.walk(x[k], format, meta, &action) } | |
return hash | |
else | |
return x | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment