Created
May 12, 2013 23:32
-
-
Save priyadarshan/5565357 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os.path | |
import re | |
import sys | |
POEM_RE = re.compile(ur"\s*:GENRE:\s*poem\s*", re.IGNORECASE) | |
END_RE = re.compile(ur"\s*:END:\s*", re.IGNORECASE) | |
SECTION_RE = re.compile(ur"[#*]") | |
BEGIN_VERSE_RE = re.compile(ur"\s*#\+BEGIN_VERSE") | |
path = "" | |
def skip_blank_lines(lines, i, output): | |
numLines = len(lines) | |
while i < numLines and lines[i].strip() == "": | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def skip_until_blank_line(lines, i, output): | |
numLines = len(lines) | |
while i < numLines and lines[i].strip() != "": | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def skip_until_section(lines, i, output): | |
numLines = len(lines) | |
while i < numLines and not lines[i].startswith("*"): | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def skip_re(regex, lines, i, output): | |
numLines = len(lines) | |
while i < numLines and regex.match(lines[i]): | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def skip_until_re(regex, lines, i, output): | |
numLines = len(lines) | |
while i < numLines and not regex.match(lines[i]): | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def parse(lines): | |
output = [] | |
numLines = len(lines) | |
i = 0 | |
# Scan until we find the index | |
i = skip_until_section(lines, i, output) | |
if i >= numLines: | |
print "ERROR: No index found in %s" % path | |
return output | |
output.append(lines[i]) | |
i += 1 | |
# Now scan until the first non-blank line, that is the meta-properties for the book | |
i = skip_blank_lines(lines, i, output) | |
if i >= numLines: | |
print "ERROR: No properties found for the book %s" % path | |
return output | |
# Now scan until the first blank line | |
i = skip_until_blank_line(lines, i, output) | |
if i >= numLines: | |
print "ERROR: Nothing found after the book's properties in %" % path | |
return output | |
while i < numLines: | |
# Scan until we reach :GENRE: poem | |
if POEM_RE.match(lines[i]): | |
# Append :POEM: | |
output.append(lines[i]) | |
i += 1 | |
# Scan until we reach :END: | |
i = skip_until_re(END_RE, lines, i, output) | |
if i >= numLines: | |
print "ERROR: Missing :END: in %s" % path | |
return output | |
# Append :END: | |
output.append(lines[i]) | |
i += 1 | |
# Now scan until we find the first non-blank line | |
i = skip_blank_lines(lines, i, output) | |
if i >= numLines: | |
print "ERROR: No content after header in %s" % path | |
return output | |
# If it's a BEGIN VERSE, keep going | |
if BEGIN_VERSE_RE.match(lines[i]): | |
output.append(lines[i]) | |
i += 1 | |
continue | |
# This is the start of the poem body | |
output.append("#+BEGIN_VERSE\n\n") | |
poem = [] | |
# The poem body continues until the next section, which begins with * | |
while i < numLines and not SECTION_RE.match(lines[i]): | |
poem.append(lines[i]) | |
i += 1 | |
# Trim blank lines at the end of the poem | |
for poemLine in range(len(poem) - 1, 0, -1): | |
if poem[poemLine].strip() != "": | |
del poem[poemLine + 1:] | |
break | |
# Insert the poem in the output | |
poem.append("\n#+END_VERSE\n\n\n") | |
output = output + poem | |
else: | |
output.append(lines[i]) | |
i += 1 | |
return output | |
if __name__ == "__main__": | |
path = sys.argv[1] | |
if not os.path.isfile(path): | |
print "Doh! The file %s does not exist." % path | |
sys.exit(1) | |
with open(path) as f: | |
lines = f.readlines() | |
output = parse(lines) | |
with open(path, "w") as f: | |
f.write("".join(output)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment