priyadarshan · May 12, 2013 23:32
diff --git a/fix_poems.py b/fix_poems.py
 #!/usr/bin/env python
 import os.path
 import re
 import sys


 POEM_RE = re.compile(ur"\s*:GENRE:\s*poem\s*", re.IGNORECASE)
 END_RE = re.compile(ur"\s*:END:\s*", re.IGNORECASE)
 SECTION_RE = re.compile(ur"[#*]")
 BEGIN_VERSE_RE = re.compile(ur"\s*#\+BEGIN_VERSE")

 path = ""


 def skip_blank_lines(lines, i, output):
    numLines = len(lines)

    while i < numLines and lines[i].strip() == "":
        output.append(lines[i])
        i += 1

    return i

 def skip_until_blank_line(lines, i, output):
    numLines = len(lines)

    while i < numLines and  lines[i].strip() != "":
        output.append(lines[i])
        i += 1

    return i

 def skip_until_section(lines, i, output):
    numLines = len(lines)

    while i < numLines and not lines[i].startswith("*"):
        output.append(lines[i])
        i += 1

    return i

 def skip_re(regex, lines, i, output):
    numLines = len(lines)

    while i < numLines and regex.match(lines[i]):
        output.append(lines[i])
        i += 1

    return i

 def skip_until_re(regex, lines, i, output):
    numLines = len(lines)

    while i < numLines and not regex.match(lines[i]):
        output.append(lines[i])
        i += 1

    return i

 def parse(lines):
    output = []
    numLines = len(lines)
    i = 0

    # Scan until we find the index
    i = skip_until_section(lines, i, output)

    if i >= numLines:
        print "ERROR: No index found in %s" % path
        return output

    output.append(lines[i])
    i += 1

    # Now scan until the first non-blank line, that is the meta-properties for the book
    i = skip_blank_lines(lines, i, output)

    if i >= numLines:
        print "ERROR: No properties found for the book %s" % path
        return output

    # Now scan until the first blank line
    i = skip_until_blank_line(lines, i, output)

    if i >= numLines:
        print "ERROR: Nothing found after the book's properties in %" % path
        return output

    while i < numLines:
        # Scan until we reach :GENRE: poem
        if POEM_RE.match(lines[i]):
            # Append :POEM:
            output.append(lines[i])
            i += 1

            # Scan until we reach :END:
            i = skip_until_re(END_RE, lines, i, output)

            if i >= numLines:
                print "ERROR: Missing :END: in %s" % path
                return output

            # Append :END:
            output.append(lines[i])
            i += 1

            # Now scan until we find the first non-blank line
            i = skip_blank_lines(lines, i, output)

            if i >= numLines:
                print "ERROR: No content after header in %s" % path
                return output

            # If it's a BEGIN VERSE, keep going
            if BEGIN_VERSE_RE.match(lines[i]):
                output.append(lines[i])
                i += 1
                continue

            # This is the start of the poem body
            output.append("#+BEGIN_VERSE\n\n")
            poem = []

            # The poem body continues until the next section, which begins with *
            while i < numLines and not SECTION_RE.match(lines[i]):
                poem.append(lines[i])
                i += 1

            # Trim blank lines at the end of the poem
            for poemLine in range(len(poem) - 1, 0, -1):
                if poem[poemLine].strip() != "":
                    del poem[poemLine + 1:]
                    break

            # Insert the poem in the output
            poem.append("\n#+END_VERSE\n\n\n")
            output = output + poem
        else:
            output.append(lines[i])
            i += 1

    return output



 if __name__ == "__main__":

    path = sys.argv[1]

    if not os.path.isfile(path):
        print "Doh! The file %s does not exist." % path
        sys.exit(1)

    with open(path) as f:
        lines = f.readlines()

    output = parse(lines)

    with open(path, "w") as f:
        f.write("".join(output))
	#!/usr/bin/env python
	import os.path
	import re
	import sys


	POEM_RE = re.compile(ur"\s:GENRE:\spoem\s*", re.IGNORECASE)
	END_RE = re.compile(ur"\s:END:\s", re.IGNORECASE)
	SECTION_RE = re.compile(ur"[#*]")
	BEGIN_VERSE_RE = re.compile(ur"\s*#\+BEGIN_VERSE")

	path = ""


	def skip_blank_lines(lines, i, output):
	numLines = len(lines)

	while i < numLines and lines[i].strip() == "":
	output.append(lines[i])
	i += 1

	return i

	def skip_until_blank_line(lines, i, output):
	numLines = len(lines)

	while i < numLines and lines[i].strip() != "":
	output.append(lines[i])
	i += 1

	return i

	def skip_until_section(lines, i, output):
	numLines = len(lines)

	while i < numLines and not lines[i].startswith("*"):
	output.append(lines[i])
	i += 1

	return i

	def skip_re(regex, lines, i, output):
	numLines = len(lines)

	while i < numLines and regex.match(lines[i]):
	output.append(lines[i])
	i += 1

	return i

	def skip_until_re(regex, lines, i, output):
	numLines = len(lines)

	while i < numLines and not regex.match(lines[i]):
	output.append(lines[i])
	i += 1

	return i

	def parse(lines):
	output = []
	numLines = len(lines)
	i = 0

	# Scan until we find the index
	i = skip_until_section(lines, i, output)

	if i >= numLines:
	print "ERROR: No index found in %s" % path
	return output

	output.append(lines[i])
	i += 1

	# Now scan until the first non-blank line, that is the meta-properties for the book
	i = skip_blank_lines(lines, i, output)

	if i >= numLines:
	print "ERROR: No properties found for the book %s" % path
	return output

	# Now scan until the first blank line
	i = skip_until_blank_line(lines, i, output)

	if i >= numLines:
	print "ERROR: Nothing found after the book's properties in %" % path
	return output

	while i < numLines:
	# Scan until we reach :GENRE: poem
	if POEM_RE.match(lines[i]):
	# Append :POEM:
	output.append(lines[i])
	i += 1

	# Scan until we reach :END:
	i = skip_until_re(END_RE, lines, i, output)

	if i >= numLines:
	print "ERROR: Missing :END: in %s" % path
	return output

	# Append :END:
	output.append(lines[i])
	i += 1

	# Now scan until we find the first non-blank line
	i = skip_blank_lines(lines, i, output)

	if i >= numLines:
	print "ERROR: No content after header in %s" % path
	return output

	# If it's a BEGIN VERSE, keep going
	if BEGIN_VERSE_RE.match(lines[i]):
	output.append(lines[i])
	i += 1
	continue

	# This is the start of the poem body
	output.append("#+BEGIN_VERSE\n\n")
	poem = []

	# The poem body continues until the next section, which begins with *
	while i < numLines and not SECTION_RE.match(lines[i]):
	poem.append(lines[i])
	i += 1

	# Trim blank lines at the end of the poem
	for poemLine in range(len(poem) - 1, 0, -1):
	if poem[poemLine].strip() != "":
	del poem[poemLine + 1:]
	break

	# Insert the poem in the output
	poem.append("\n#+END_VERSE\n\n\n")
	output = output + poem
	else:
	output.append(lines[i])
	i += 1

	return output



	if __name__ == "__main__":

	path = sys.argv[1]

	if not os.path.isfile(path):
	print "Doh! The file %s does not exist." % path
	sys.exit(1)

	with open(path) as f:
	lines = f.readlines()

	output = parse(lines)

	with open(path, "w") as f:
	f.write("".join(output))