Skip to content

Instantly share code, notes, and snippets.

@priyadarshan
Last active December 18, 2015 20:48
Show Gist options
  • Save priyadarshan/5842580 to your computer and use it in GitHub Desktop.
Save priyadarshan/5842580 to your computer and use it in GitHub Desktop.
Fix CITE-KEY regex
import os
import re
import sys
CITE_KEY_RE = re.compile(ur"\s*:CITE-KEY:(?:\s*(\w+)(-\d+)?)?", re.IGNORECASE)
PROPERTIES_RE = re.compile(ur"(\s*):PROPERTIES:\s*", re.IGNORECASE)
END_RE = re.compile(ur"\s*:END:\s*", re.IGNORECASE)
META_GENRE_RE = re.compile(ur"\s*:GENRE:\s*(note|preface|section)\b", re.IGNORECASE)
citeKeys = {}
def skip_until_re(regex, lines, i, output):
numLines = len(lines)
while i < numLines and not regex.match(lines[i]):
output.append(lines[i])
i += 1
return i
def parse(lines):
output = []
numLines = len(lines)
i = 0
# Scan until we find the first CITE-KEY
i = skip_until_re(CITE_KEY_RE, lines, i, output)
if i >= numLines:
print("ERROR: missing initial CITE-KEY")
return output
citeKey = CITE_KEY_RE.match(lines[i]).group(1)
if citeKey not in citeKeys:
citeKeys[citeKey] = 0
output.append(lines[i])
i += 1
# Now scan until we find :END:
while i < numLines:
i = skip_until_re(PROPERTIES_RE, lines, i, output)
if i >= numLines:
return output
indent = PROPERTIES_RE.match(lines[i]).group(1)
output.append(lines[i])
i += 1
haveMeta = False
haveEnd = False
while i <= numLines:
line = lines[i]
m = CITE_KEY_RE.match(line)
if m:
i += 1
continue
else:
m = META_GENRE_RE.match(line)
if m:
haveMeta = True
else:
m = END_RE.match(line)
if m:
# Insert next CITE-KEY
if haveMeta:
output.append("%s:CITE-KEY: %s:%dn\n" % (indent, citeKey, citeKeys[citeKey] + 1))
else:
citeKeys[citeKey] += 1
output.append("%s:CITE-KEY: %s:%d\n" % (indent, citeKey, citeKeys[citeKey]))
haveEnd = True
output.append(line)
i += 1
if haveEnd:
break
return output
if __name__ == "__main__":
rootPath = os.path.realpath(os.path.expanduser(sys.argv[1]))
for root, dirs, files in os.walk(rootPath):
for filename in [filename for filename in files if filename.endswith(".txt")]:
print filename
filePath = os.path.join(root, filename)
with open(filePath) as f:
lines = f.readlines()
output = parse(lines)
with open(filePath, "w") as f:
f.write("".join(output))
print "\ncite keys:\n"
for key in sorted(citeKeys.keys(), cmp=lambda x,y: cmp(x.lower(), y.lower())):
print "%s:%d" % (key, citeKeys[key])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment