Last active
December 18, 2015 20:48
-
-
Save priyadarshan/5842580 to your computer and use it in GitHub Desktop.
Fix CITE-KEY regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import sys | |
CITE_KEY_RE = re.compile(ur"\s*:CITE-KEY:(?:\s*(\w+)(-\d+)?)?", re.IGNORECASE) | |
PROPERTIES_RE = re.compile(ur"(\s*):PROPERTIES:\s*", re.IGNORECASE) | |
END_RE = re.compile(ur"\s*:END:\s*", re.IGNORECASE) | |
META_GENRE_RE = re.compile(ur"\s*:GENRE:\s*(note|preface|section)\b", re.IGNORECASE) | |
citeKeys = {} | |
def skip_until_re(regex, lines, i, output): | |
numLines = len(lines) | |
while i < numLines and not regex.match(lines[i]): | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def parse(lines): | |
output = [] | |
numLines = len(lines) | |
i = 0 | |
# Scan until we find the first CITE-KEY | |
i = skip_until_re(CITE_KEY_RE, lines, i, output) | |
if i >= numLines: | |
print("ERROR: missing initial CITE-KEY") | |
return output | |
citeKey = CITE_KEY_RE.match(lines[i]).group(1) | |
if citeKey not in citeKeys: | |
citeKeys[citeKey] = 0 | |
output.append(lines[i]) | |
i += 1 | |
# Now scan until we find :END: | |
while i < numLines: | |
i = skip_until_re(PROPERTIES_RE, lines, i, output) | |
if i >= numLines: | |
return output | |
indent = PROPERTIES_RE.match(lines[i]).group(1) | |
output.append(lines[i]) | |
i += 1 | |
haveMeta = False | |
haveEnd = False | |
while i <= numLines: | |
line = lines[i] | |
m = CITE_KEY_RE.match(line) | |
if m: | |
i += 1 | |
continue | |
else: | |
m = META_GENRE_RE.match(line) | |
if m: | |
haveMeta = True | |
else: | |
m = END_RE.match(line) | |
if m: | |
# Insert next CITE-KEY | |
if haveMeta: | |
output.append("%s:CITE-KEY: %s:%dn\n" % (indent, citeKey, citeKeys[citeKey] + 1)) | |
else: | |
citeKeys[citeKey] += 1 | |
output.append("%s:CITE-KEY: %s:%d\n" % (indent, citeKey, citeKeys[citeKey])) | |
haveEnd = True | |
output.append(line) | |
i += 1 | |
if haveEnd: | |
break | |
return output | |
if __name__ == "__main__": | |
rootPath = os.path.realpath(os.path.expanduser(sys.argv[1])) | |
for root, dirs, files in os.walk(rootPath): | |
for filename in [filename for filename in files if filename.endswith(".txt")]: | |
print filename | |
filePath = os.path.join(root, filename) | |
with open(filePath) as f: | |
lines = f.readlines() | |
output = parse(lines) | |
with open(filePath, "w") as f: | |
f.write("".join(output)) | |
print "\ncite keys:\n" | |
for key in sorted(citeKeys.keys(), cmp=lambda x,y: cmp(x.lower(), y.lower())): | |
print "%s:%d" % (key, citeKeys[key]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment