Last active
August 29, 2015 14:17
-
-
Save moble/58fc6b1956caa6c944bb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
"""Add a line to docstring in front of code samples if necessary | |
The SymPy documentation automatically converts code samples in docstrings to | |
blocks that can be run in SymPy Live. But many of the docstrings throughout | |
the project follow text, which means that these code samples are not | |
recognized. So not only can they not be run in SymPy Live, but they are | |
converted to `<p>` paragraphs, which means that the formatting is not | |
preserved, and it's all run together on line. | |
This little script finds examples of this, and adds the necessary newline. | |
There is a slight complication, in that you can't just detect lines that start | |
with `>>>` and add a newline in front of them, because that will make a new | |
SymPy Live code block for each line, which is not what we want. Also, you | |
can't detect when such a line immediately follows another line that doesn't | |
start with `>>>`, because sometimes there will be output from a previous | |
command. So instead, what I do below is to look for two consecutive newlines | |
followed by something other than `>`, which indicates a text block. Then I | |
look for a line starting with `>>>` that is NOT separated from that text block | |
by a double newline. | |
Following Chris Smith's advice, I'll also look for improperly formatted | |
"Example" headings, using a second regex pattern to be applied after the one | |
above. | |
""" | |
from __future__ import print_function | |
import argparse | |
def insert_newlines(doit=False): | |
import sys | |
import os | |
import fnmatch | |
import re | |
import difflib | |
# See above for a description of what this pattern matches. | |
pattern1 = re.compile(r""" | |
((?:\n\n)|(?:^\s*)) # Match two consecutive newlines or the beginning of the string and whitespace | |
( # Match a text block | |
(?: # Set up a non-capturing group to represent a line of text | |
[ \t\r\f\v]* # Whitespace other than a newline | |
(?!\s|>) # Something that is not whitespace or ">" | |
[^\n]* # Any number of non-newline characters | |
\n # A single newline | |
)+ # Match one or more such groups | |
) # End of text block | |
([ \t\r\f\v]*>>>) # Match the beginning of a code block | |
""", re.VERBOSE) | |
pattern2 = re.compile(r""" | |
(\n) | |
([ \t\r\f\v]*) | |
( | |
Examples?:* | |
\n | |
(?: | |
[ \t\r\f\v]* | |
(?:=|-)+ | |
\n | |
)? | |
) | |
""", re.VERBOSE) | |
removal_count = 0 | |
addition_count = 0 | |
for root, dirnames, filenames in os.walk('.'): | |
for filename in fnmatch.filter(filenames, '*.py'): | |
if not root[2:7] == 'build': | |
filename = os.path.join(root, filename) | |
with open(filename, 'r') as f: | |
file_contents = f.read() | |
# This does the modification. It first splits the file into groups | |
# based on the presence of triple quotes. Every second such group is | |
# inside the triple quotes, so we just act on those groups, then join | |
# with triple quotes again to get back to the original. | |
file_contents_modified = '"""'.join([re.sub(pattern2, | |
r'\g<1>\g<2>Examples\n\g<2>========\n', | |
re.sub(pattern1, | |
r'\1\2\n\3', | |
group)) | |
if i%2==1 | |
else group | |
for i,group in enumerate(file_contents.split('"""'))]) | |
if file_contents_modified != file_contents: | |
print(filename) | |
d = difflib.Differ() | |
for diff in d.compare(file_contents.splitlines(1), file_contents_modified.splitlines(1)): | |
if diff[0] in ['-', '?', '+']: | |
print(diff.rstrip()) | |
if diff[0] == '-': | |
removal_count += 1 | |
if diff[0] == '+': | |
addition_count += 1 | |
if doit: | |
# Rewrite the file | |
with open(filename, 'w') as f: | |
f.write(file_contents_modified) | |
print("Removed {0} lines and added {1}".format(removal_count, addition_count)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description="Add a line to docstring in front of code samples if necessary",) | |
parser.add_argument("--doit", | |
action="store_true", | |
help="make changes in the files; if not present, just show the problematic lines") | |
args = parser.parse_args() | |
insert_newlines(doit=args.doit) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment