arp242 · April 25, 2020 07:17
diff --git a/vimtomd.py b/vimtomd.py
 #!/usr/bin/env python3
 # 
 # Convert Vim help files to Markdown.
 # 
 # No copyright; do what you will with this.

 import sys, pprint, re, argparse, subprocess, urllib.parse

 def parse_first_line(line):
 	# *usr_23.txt*	For Vim version 8.0.  Last change: 2006 Apr 24
 	# *xdg_open.txt* Run xdg-open from Vim; re-implements netrw's gx.
 	star = line.find('*', 1)
 	return {
 		'name': line[1:star],
 		'blurb': line[star+1:].strip(),
 	}

 def parse_intro(section):
 	# TOC can have two forms:
 	#
 	# "New" style:
 	# 1. Search commands		|search-commands|
 	# 2. The definition of a pattern	|search-pattern|
 	#
 	# And the "Old" style:
 	# |21.1|	Suspend and resume
 	# |21.2|	Executing shell commands
 	#
 	# Only the "new" style is supported.

 	ns = []
 	for line in section[1:]:
 		# TODO
 		'^[0-9]+\.\s+?'
 		'.*'
 		'\s+?\|[\w-]\|$'
 		ns.append(line)

 	return parse_text(ns)

 def parse_section(text):
 	# Header can have two forms:
 	# *23.2*	Header
 	# 1. Search commands				*search-commands*
 	header = text[0].strip()
 	if header.startswith('*'):
 		star = header.find('*', 1)
 		section = {
 			'tag':  header[1:star],
 			'name':  header[star+1:].strip(),
 		}
 	else:
 		star = header.rfind('*', 0, len(header)-1)
 		section = {
 			'tag':  header[star+1:len(header)-1],
 			'name':  header[:star].strip(),
 		}
 	# Remove leading section number, if any
 	section['name'] = re.sub('^[0-9]+\.\s+', '', section['name'])

 	section['text'] = parse_text(text[1:])
 	return section

 def parse_inline(line):
 	md = line

 	# |n| -> [`n`](http://vimhelp.appspot.com/pattern.txt.html#n)
 	# |foo| -> `foo`
 	# not very fast...
 	def repl(m):
 		out = subprocess.run(['vim', '-u', 'NONE', '+:help ' + m.groups()[0], '+:q', '+:q'],
 			stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 		stdout = out.stdout.decode()

 		# "pattern.txt" [readonly] 1395 lines, 58382 characters
 		# E149: Sorry, no help for foooooobarrrrr
 		if not ' [readonly] ' in stdout:
 			return '`{}`'.format(m)

 		q = stdout.find('"') + 1
 		url = 'http://vimhelp.appspot.com/{}#{}'.format(
 			out[q+1:stdout.find('"', q+1)], urllib.parse.quote(m))
 		return '[`{}`]({})'.format(m, url)
 	
 	md = re.sub(r'\|(.*?)\|', r'`\1`', md)
 	#md = re.sub(r'\|(.*?)\|', repl, md)

 	# *foo* -> `foo`
 	# {foo} -> `foo`
 	# <foo> -> `foo`
 	md = re.sub(r'\*(.*?)\*', r'`\1`', md)
 	md = re.sub(r'{(.*?)}', r'`\1`', md)
 	md = re.sub(r'<(.*?)>', r'`\1`', md)

 	md = re.sub(r'\*(.*?)\*', r'`\1`', md)

 	return md

 def parse_text(text):
 	# Remove modeline which is typically set on the last section
 	# vim:tw=78:ts=8:ft=help:norl:expandtab
 	for line in text[-3:]:
 		if line.startswith('vim:'):
 			text.remove(line)
 			break

 	md = '\n'.join([ parse_inline(l) for l in text ])

 	# Replace much of the indentation that we don't need
 	md = re.sub(r'\n +', r'\n', md)

 	# Code blocks, also add indentation again
 	def repl(m):
 		g = '\n'.join([ '    ' + l for l in m.groups()[0].split('\n') ])
 		return '\n\n' + g + '\n\n'
 	md = re.sub(r'\n+>\n(.*)\n<\n+', repl, md, 0, re.DOTALL)

 	return md.strip()

 def parse_document(vim):
 	header_marker = '=' * 78
 	vim = [ s.strip().split('\n') for s in vim.split(header_marker) ]

 	fl = parse_first_line(vim[0][0])
 	doc = {
 		'name': fl['name'],
 		'blurb': fl['blurb'],
 		'intro': parse_intro(vim[0]),
 		'sections': [],
 	}

 	for section in vim[1:]:
 		doc['sections'].append(parse_section(section))

 	return doc

 if __name__ == '__main__':
 	parser = argparse.ArgumentParser(description='Convert Vim help files to Markdown.')
 	parser.add_argument('-S', '--show-sections',  action='store_true',
 		help='Print out the section names')
 	parser.add_argument('-s', '--sections', default='ALL',
 		help='List of sections to include; comma-separated.'
 		     'Use ALL for all sections; INTRO for the intro text.')
 	parser.add_argument('doc', nargs=1,
 		help='Vim help document')
 	args = parser.parse_args()

 	with open(args.doc[0], 'r') as fp:
 		doc = parse_document(fp.read())

 	if args.show_sections:
 		for s in doc['sections']:
 			print('tag: {} – text: {}'.format(s['tag'], s['name']))
 	else:
 		if args.sections == 'ALL' or 'INTRO' in args.sections:
 			print(doc['intro'])
 			print('\n')

 		sections = [ s.strip() for s in args.sections.split(',') ]
 		for s in doc['sections']:
 			if not (args.sections == 'ALL' or s['name'] in sections or s['tag'] in sections):
 				continue

 			print(s['name'])
 			print('=' * len(s['name']))
 			print(s['text'])
 			print('\n')
	#!/usr/bin/env python3
	#
	# Convert Vim help files to Markdown.
	#
	# No copyright; do what you will with this.

	import sys, pprint, re, argparse, subprocess, urllib.parse

	def parse_first_line(line):
	# usr_23.txt For Vim version 8.0. Last change: 2006 Apr 24
	# xdg_open.txt Run xdg-open from Vim; re-implements netrw's gx.
	star = line.find('*', 1)
	return {
	'name': line[1:star],
	'blurb': line[star+1:].strip(),
	}

	def parse_intro(section):
	# TOC can have two forms:
	#
	# "New" style:
	# 1. Search commands \|search-commands\|
	# 2. The definition of a pattern \|search-pattern\|
	#
	# And the "Old" style:
	# \|21.1\| Suspend and resume
	# \|21.2\| Executing shell commands
	#
	# Only the "new" style is supported.

	ns = []
	for line in section[1:]:
	# TODO
	'^[0-9]+\.\s+?'
	'.*'
	'\s+?\\|[\w-]\\|$'
	ns.append(line)

	return parse_text(ns)

	def parse_section(text):
	# Header can have two forms:
	# 23.2 Header
	# 1. Search commands search-commands
	header = text[0].strip()
	if header.startswith('*'):
	star = header.find('*', 1)
	section = {
	'tag': header[1:star],
	'name': header[star+1:].strip(),
	}
	else:
	star = header.rfind('*', 0, len(header)-1)
	section = {
	'tag': header[star+1:len(header)-1],
	'name': header[:star].strip(),
	}
	# Remove leading section number, if any
	section['name'] = re.sub('^[0-9]+\.\s+', '', section['name'])

	section['text'] = parse_text(text[1:])
	return section

	def parse_inline(line):
	md = line

	# \|n\| -> [`n`](http://vimhelp.appspot.com/pattern.txt.html#n)
	# \|foo\| -> `foo`
	# not very fast...
	def repl(m):
	out = subprocess.run(['vim', '-u', 'NONE', '+:help ' + m.groups()[0], '+:q', '+:q'],
	stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	stdout = out.stdout.decode()

	# "pattern.txt" [readonly] 1395 lines, 58382 characters
	# E149: Sorry, no help for foooooobarrrrr
	if not ' [readonly] ' in stdout:
	return '`{}`'.format(m)

	q = stdout.find('"') + 1
	url = 'http://vimhelp.appspot.com/{}#{}'.format(
	out[q+1:stdout.find('"', q+1)], urllib.parse.quote(m))
	return '[`{}`]({})'.format(m, url)

	md = re.sub(r'\\|(.*?)\\|', r'`\1`', md)
	#md = re.sub(r'\\|(.*?)\\|', repl, md)

	# foo -> `foo`
	# {foo} -> `foo`
	# <foo> -> `foo`
	md = re.sub(r'\(.?)\*', r'`\1`', md)
	md = re.sub(r'{(.*?)}', r'`\1`', md)
	md = re.sub(r'<(.*?)>', r'`\1`', md)

	md = re.sub(r'\(.?)\*', r'`\1`', md)

	return md

	def parse_text(text):
	# Remove modeline which is typically set on the last section
	# vim:tw=78:ts=8:ft=help:norl:expandtab
	for line in text[-3:]:
	if line.startswith('vim:'):
	text.remove(line)
	break

	md = '\n'.join([ parse_inline(l) for l in text ])

	# Replace much of the indentation that we don't need
	md = re.sub(r'\n +', r'\n', md)

	# Code blocks, also add indentation again
	def repl(m):
	g = '\n'.join([ ' ' + l for l in m.groups()[0].split('\n') ])
	return '\n\n' + g + '\n\n'
	md = re.sub(r'\n+>\n(.*)\n<\n+', repl, md, 0, re.DOTALL)

	return md.strip()

	def parse_document(vim):
	header_marker = '=' * 78
	vim = [ s.strip().split('\n') for s in vim.split(header_marker) ]

	fl = parse_first_line(vim[0][0])
	doc = {
	'name': fl['name'],
	'blurb': fl['blurb'],
	'intro': parse_intro(vim[0]),
	'sections': [],
	}

	for section in vim[1:]:
	doc['sections'].append(parse_section(section))

	return doc

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Convert Vim help files to Markdown.')
	parser.add_argument('-S', '--show-sections', action='store_true',
	help='Print out the section names')
	parser.add_argument('-s', '--sections', default='ALL',
	help='List of sections to include; comma-separated.'
	'Use ALL for all sections; INTRO for the intro text.')
	parser.add_argument('doc', nargs=1,
	help='Vim help document')
	args = parser.parse_args()

	with open(args.doc[0], 'r') as fp:
	doc = parse_document(fp.read())

	if args.show_sections:
	for s in doc['sections']:
	print('tag: {} – text: {}'.format(s['tag'], s['name']))
	else:
	if args.sections == 'ALL' or 'INTRO' in args.sections:
	print(doc['intro'])
	print('\n')

	sections = [ s.strip() for s in args.sections.split(',') ]
	for s in doc['sections']:
	if not (args.sections == 'ALL' or s['name'] in sections or s['tag'] in sections):
	continue

	print(s['name'])
	print('=' * len(s['name']))
	print(s['text'])
	print('\n')