Created
April 25, 2020 07:17
-
-
Save arp242/ef0574aa2bf9d3ea31673223af98bef7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# Convert Vim help files to Markdown. | |
# | |
# No copyright; do what you will with this. | |
import sys, pprint, re, argparse, subprocess, urllib.parse | |
def parse_first_line(line): | |
# *usr_23.txt* For Vim version 8.0. Last change: 2006 Apr 24 | |
# *xdg_open.txt* Run xdg-open from Vim; re-implements netrw's gx. | |
star = line.find('*', 1) | |
return { | |
'name': line[1:star], | |
'blurb': line[star+1:].strip(), | |
} | |
def parse_intro(section): | |
# TOC can have two forms: | |
# | |
# "New" style: | |
# 1. Search commands |search-commands| | |
# 2. The definition of a pattern |search-pattern| | |
# | |
# And the "Old" style: | |
# |21.1| Suspend and resume | |
# |21.2| Executing shell commands | |
# | |
# Only the "new" style is supported. | |
ns = [] | |
for line in section[1:]: | |
# TODO | |
'^[0-9]+\.\s+?' | |
'.*' | |
'\s+?\|[\w-]\|$' | |
ns.append(line) | |
return parse_text(ns) | |
def parse_section(text): | |
# Header can have two forms: | |
# *23.2* Header | |
# 1. Search commands *search-commands* | |
header = text[0].strip() | |
if header.startswith('*'): | |
star = header.find('*', 1) | |
section = { | |
'tag': header[1:star], | |
'name': header[star+1:].strip(), | |
} | |
else: | |
star = header.rfind('*', 0, len(header)-1) | |
section = { | |
'tag': header[star+1:len(header)-1], | |
'name': header[:star].strip(), | |
} | |
# Remove leading section number, if any | |
section['name'] = re.sub('^[0-9]+\.\s+', '', section['name']) | |
section['text'] = parse_text(text[1:]) | |
return section | |
def parse_inline(line): | |
md = line | |
# |n| -> [`n`](http://vimhelp.appspot.com/pattern.txt.html#n) | |
# |foo| -> `foo` | |
# not very fast... | |
def repl(m): | |
out = subprocess.run(['vim', '-u', 'NONE', '+:help ' + m.groups()[0], '+:q', '+:q'], | |
stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
stdout = out.stdout.decode() | |
# "pattern.txt" [readonly] 1395 lines, 58382 characters | |
# E149: Sorry, no help for foooooobarrrrr | |
if not ' [readonly] ' in stdout: | |
return '`{}`'.format(m) | |
q = stdout.find('"') + 1 | |
url = 'http://vimhelp.appspot.com/{}#{}'.format( | |
out[q+1:stdout.find('"', q+1)], urllib.parse.quote(m)) | |
return '[`{}`]({})'.format(m, url) | |
md = re.sub(r'\|(.*?)\|', r'`\1`', md) | |
#md = re.sub(r'\|(.*?)\|', repl, md) | |
# *foo* -> `foo` | |
# {foo} -> `foo` | |
# <foo> -> `foo` | |
md = re.sub(r'\*(.*?)\*', r'`\1`', md) | |
md = re.sub(r'{(.*?)}', r'`\1`', md) | |
md = re.sub(r'<(.*?)>', r'`\1`', md) | |
md = re.sub(r'\*(.*?)\*', r'`\1`', md) | |
return md | |
def parse_text(text): | |
# Remove modeline which is typically set on the last section | |
# vim:tw=78:ts=8:ft=help:norl:expandtab | |
for line in text[-3:]: | |
if line.startswith('vim:'): | |
text.remove(line) | |
break | |
md = '\n'.join([ parse_inline(l) for l in text ]) | |
# Replace much of the indentation that we don't need | |
md = re.sub(r'\n +', r'\n', md) | |
# Code blocks, also add indentation again | |
def repl(m): | |
g = '\n'.join([ ' ' + l for l in m.groups()[0].split('\n') ]) | |
return '\n\n' + g + '\n\n' | |
md = re.sub(r'\n+>\n(.*)\n<\n+', repl, md, 0, re.DOTALL) | |
return md.strip() | |
def parse_document(vim): | |
header_marker = '=' * 78 | |
vim = [ s.strip().split('\n') for s in vim.split(header_marker) ] | |
fl = parse_first_line(vim[0][0]) | |
doc = { | |
'name': fl['name'], | |
'blurb': fl['blurb'], | |
'intro': parse_intro(vim[0]), | |
'sections': [], | |
} | |
for section in vim[1:]: | |
doc['sections'].append(parse_section(section)) | |
return doc | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Convert Vim help files to Markdown.') | |
parser.add_argument('-S', '--show-sections', action='store_true', | |
help='Print out the section names') | |
parser.add_argument('-s', '--sections', default='ALL', | |
help='List of sections to include; comma-separated.' | |
'Use ALL for all sections; INTRO for the intro text.') | |
parser.add_argument('doc', nargs=1, | |
help='Vim help document') | |
args = parser.parse_args() | |
with open(args.doc[0], 'r') as fp: | |
doc = parse_document(fp.read()) | |
if args.show_sections: | |
for s in doc['sections']: | |
print('tag: {} – text: {}'.format(s['tag'], s['name'])) | |
else: | |
if args.sections == 'ALL' or 'INTRO' in args.sections: | |
print(doc['intro']) | |
print('\n') | |
sections = [ s.strip() for s in args.sections.split(',') ] | |
for s in doc['sections']: | |
if not (args.sections == 'ALL' or s['name'] in sections or s['tag'] in sections): | |
continue | |
print(s['name']) | |
print('=' * len(s['name'])) | |
print(s['text']) | |
print('\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment