Created
April 19, 2018 22:13
-
-
Save dmahugh/0610d516ab13dd5698d74ca6197aa14a to your computer and use it in GitHub Desktop.
Extract code snippets from Markdown files and write to YAML files, in support of Office referencee content migration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Convert code snippets for use in auto-generated docs. | |
""" | |
import glob | |
REPO_FOLDER = 'C:/temp/office-js-docs' # local cloned copy of repo | |
def main(): | |
"""Extract snippets for each platform/folder.""" | |
# note we're not doing Shared for now | |
for platform in ['Excel', 'OneNote', 'Outlook', 'Visio', 'Word']: | |
extract_snippets(platform, | |
folder=f'{REPO_FOLDER}/reference/{platform.lower()}' | |
f'{"/1.6" if platform == "Outlook" else ""}', | |
output_file=f'{platform.lower()}-snippets.yaml') | |
def extract_snippets(platform, folder, output_file): | |
"""Extract snippets from *.md in a platform's folder and write to output file. | |
Sample of the required output format: | |
https://github.com/OfficeDev/office-js-docs-reference/blob/master/generate-docs/script-inputs/script-lab-snippets.yaml | |
""" | |
outfile = open(output_file, 'w') | |
# initialize dictionary of state variables | |
state = {'files': 0, 'snippet_files': set(), 'snippets': 0, 'methods': 0, | |
'in_method_section': False, 'in_snippet': False, 'output_heading': '', | |
'method_section_level': 0, 'headings_written': set(), 'last_heading': '', | |
'object_name': ''} | |
for md_file in glob.glob(folder + '/*.md'): | |
state['files'] += 1 | |
if 'overview' in md_file.lower(): | |
continue # skip any files with 'overview' in the name | |
state['object_name'] = '' | |
with open(md_file) as input_file: | |
# variables for tracking state as we move through each file ... | |
state['in_method_section'] = False | |
state['output_heading'] = '' | |
state['method_section_level'] = 0 | |
state['headings_written'] = set() | |
state['last_heading'] = '' | |
for line in input_file.readlines(): | |
if line.strip().startswith('#'): | |
state['last_heading'] = line.lower().strip('# \n') | |
if heading_level(line) == 1: | |
state['object_name'] = line.strip('# \n').split(' ')[0] | |
if start_method_section(line): | |
state['in_method_section'] = True | |
state['method_section_level'] = heading_level(line) | |
state['in_snippet'] = False | |
continue | |
if state['in_snippet'] and state['last_heading'] != 'syntax': | |
continue_snippet(line, state, outfile) | |
else: | |
# not inside a snippet | |
if not state['in_method_section']: | |
continue # continue until Method or Method Details section | |
# check whether this heading is one level below the | |
# current Method* heading (i.w., a method-name heading) | |
if heading_level(line) == state['method_section_level'] + 1: | |
state['output_heading'] = f'{platform}.{state["object_name"]}.' + \ | |
f"{line.strip().split(' ')[1].split('(')[0]}:" | |
# search for a snippet | |
if snippet_start(line) and state['last_heading'] != 'syntax': | |
begin_snippet(md_file, line, state, outfile) | |
else: | |
# check whether we're at the end of a Method* section | |
if 0 < heading_level(line) <= state['method_section_level']: | |
state['in_method_section'] = False | |
state['method_section_level'] = 0 | |
state['in_snippet'] = False | |
outfile.close() | |
print(f'{output_file:<22} ' | |
f'{state["snippets"]:>3} snippets found ' | |
f'for {state["methods"]:>3} methods; ' | |
f'{state["files"]:>3} total Markdown files, ' | |
f'{len(state["snippet_files"]):>3} contain snippets') | |
def begin_snippet(md_file, line, state, outfile): | |
"""Handle the first line of a snippet to be extracted. | |
State settings are updated, and the line is written to output if not blank. | |
""" | |
state['in_snippet'] = True | |
state['snippets'] += 1 | |
state['snippet_files'].add(md_file) | |
if state['output_heading'] not in state['headings_written']: | |
state['methods'] += 1 | |
outfile.write(state['output_heading'] + '\n') | |
state['headings_written'].add(state['output_heading']) | |
outfile.write(' - |-\n') | |
# only write first line if non-blank | |
if line.strip().replace('```js', '').strip(): | |
outfile.write(f' {line}') | |
def continue_snippet(line, state, outfile): | |
"""Copy a line of a snippet ot the output file, and handle end of | |
snippet if this line ends the snippet. | |
""" | |
if '```' in line: | |
# this is the end of a snippet | |
if line.strip().replace('```', '').strip(): | |
outfile.write(f' {line}') # only write line if non-blank | |
state['in_snippet'] = False | |
else: | |
outfile.write(f' {line}') | |
def heading_level(line): | |
"""Returns the heading level (i.e., number of #s at start of line) for | |
a heading from a Markdown file.""" | |
if not line.strip().startswith('#'): | |
return 0 # not a heading | |
return len(line.strip().lower().split(' ')[0]) | |
def snippet_start(line): | |
"""Determine whether a line of Markdown is the beginning of a JavaScript snippet. | |
""" | |
return line.strip().lower().startswith('```js') or \ | |
line.strip().lower().startswith('```javascript') | |
def start_method_section(line): | |
"""Determine whether a line from a Markdown file is the heading that starts | |
a Method or Method Details section.""" | |
if not heading_level(line): | |
return False # not a heading | |
return line.strip('# ').lower().startswith('method') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment