Last active
April 13, 2017 16:44
-
-
Save yymao/e803bf715a683b7e219f57284bdcb0f4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import print_function | |
import re | |
import argparse | |
def _find_ending_brace(s, open_brace=1): | |
escaping = False | |
output = '' | |
remain = '' | |
for i, c in enumerate(s): | |
if escaping: | |
escaping = False | |
else: | |
if c == '%': | |
break | |
elif c == '{': | |
open_brace += 1 | |
elif c == '}': | |
open_brace -= 1 | |
if open_brace == 0: | |
remain = s[i+1:].strip() | |
break | |
escaping = (c == '\\') | |
output += c | |
return open_brace, output, remain | |
def _print_counter(counter): | |
_counter = list(counter) | |
output = '' | |
while sum(_counter): | |
output += '{}.'.format(_counter.pop(0)) | |
return output.rstrip('.') | |
def _iter_section(fp, re_sec): | |
remain = '' | |
open_brace = 0 | |
while True: | |
try: | |
if remain: | |
l = remain | |
remain = '' | |
else: | |
l = next(fp) | |
m = re_sec.search(l) | |
if m is None: | |
continue | |
level, skip, title = m.groups() | |
open_brace, title, remain = _find_ending_brace(title.strip() + ' ', 1) | |
while open_brace: | |
open_brace, title_this, remain = _find_ending_brace(next(fp).strip() + ' ', open_brace) | |
title += title_this | |
yield level, skip, title | |
except StopIteration: | |
if open_brace: | |
raise ValueError('misformed tex file') | |
break | |
def iter_tex_sections(files, max_sublevel=2, no_number='*'): | |
counter = [0]*(max_sublevel+1) | |
re_sec = re.compile(r'\\((?:sub){{0,{}}})section(\*?){{(.*)'.format(max_sublevel)) | |
if isinstance(files, basestring): | |
files = [files] | |
for this_file in files: | |
with open(this_file) as f: | |
for level, skip, title in _iter_section(f, re_sec): | |
if not skip: | |
level = len(level) // 3 | |
counter[level] += 1 | |
for i in range(level+1, len(counter)): | |
counter[i] = 0 | |
yield '{} {}'.format(no_number if skip else _print_counter(counter), title).strip() | |
def main(): | |
parser = argparse.ArgumentParser(description='Extract section titles from a TeX file') | |
parser.add_argument('files', metavar='TEX_FILE', nargs='+') | |
parser.add_argument('--max-sublevel', default=2, type=int, help='maximal subsection level (default: 2)') | |
parser.add_argument('--no-number', default='*', help='Bullet point for sections without numbering') | |
args = parser.parse_args() | |
for l in iter_tex_sections(**args.__dict__): | |
print(l) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment