Skip to content

Instantly share code, notes, and snippets.

@dniku
Created June 11, 2017 23:15
Show Gist options
  • Save dniku/02c48cecbc238c370dfda56aaca9e122 to your computer and use it in GitHub Desktop.
Save dniku/02c48cecbc238c370dfda56aaca9e122 to your computer and use it in GitHub Desktop.
import os, subprocess
# Regex to detect [//]-style comments:
# \[\/\/\]: (?:# )?(?:\((.*)\)|([^(].*[^)]))
def process_pandoc(in_path, out_path):
with open(in_path, 'r') as f:
lines = f.readlines()
if not lines:
with open(out_path, 'w') as f:
pass
return
state = 'before header'
for header_end, line in enumerate(lines):
if state == 'before header' and line.strip() == '---':
state = 'in header'
elif state == 'in header' and line.strip() == '---':
state = 'after header'
elif state == 'after header' and line.strip():
break
header = lines[:header_end]
enabled_extensions = [
'yaml_metadata_block',
# 'grid_tables', # not supported by kramdown
]
disabled_extensions = [
'hard_line_breaks',
# 'raw_html',
# 'simple_tables',
# 'pipe_tables',
# 'multiline_tables',
]
extensions = (
''.join('+' + ext for ext in enabled_extensions) +
''.join('-' + ext for ext in disabled_extensions)
)
subprocess.run([
'pandoc',
'--from=markdown',
'--to=markdown_github' + extensions,
'--columns=80',
'--atx-headers', # makes headers with ### instead of underlining
'--normalize', # removes redundant spaces between words
# '--smart', # adds fancy punctuation (—, …, etc)
# fancy punctuation includes fancy quotes, which I personally don't like
# '--standalone', # adds YAML metadata header
# pandoc does not preserve order in the header, so not adding it back
in_path,
'-o',
out_path
])
with open(out_path, 'r') as f:
lines = f.readlines()
fixed_lines = []
for line, next_line in zip(lines[:-1], lines[1:]):
fixed_lines.append(line)
if line.strip().endswith('-->') and next_line.strip():
fixed_lines.append('\n')
fixed_lines.append(lines[-1])
lines = header + fixed_lines
with open(out_path, 'w') as f:
f.writelines(lines)
def process(docs_in, docs_out, in_root, in_fname):
in_path = os.path.join(in_root, in_fname)
print(in_path)
assert in_path.startswith(docs_in)
out_root = docs_out + in_root[len(docs_in):]
out_path = os.path.join(out_root, in_fname)
if not os.path.exists(out_root):
os.makedirs(out_root)
process_pandoc(in_path, out_path)
def main():
docs_in = '_docs'
docs_out = '_formatted_docs'
for root, dirs, files in os.walk(docs_in):
for fname in files:
if fname.endswith('.md'):
process(docs_in, docs_out, root, fname)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment