Created
May 28, 2013 19:54
-
-
Save priyadarshan/5665601 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import sys | |
CITE_KEY_RE = re.compile(ur"\s*:CITE-KEY:\s*(\w+)(-\d+)?", re.IGNORECASE) | |
PROPERTIES_RE = re.compile(ur"(\s*):PROPERTIES:\s*", re.IGNORECASE) | |
END_RE = re.compile(ur"\s*:END:\s*", re.IGNORECASE) | |
citeKeys = {} | |
def skip_until_re(regex, lines, i, output): | |
numLines = len(lines) | |
while i < numLines and not regex.match(lines[i]): | |
output.append(lines[i]) | |
i += 1 | |
return i | |
def parse(lines): | |
output = [] | |
numLines = len(lines) | |
i = 0 | |
# Scan until we find the first CITE-KEY | |
i = skip_until_re(CITE_KEY_RE, lines, i, output) | |
if i >= numLines: | |
print("ERROR: missing initial CITE-KEY") | |
return output | |
citeKey = CITE_KEY_RE.match(lines[i]).group(1) | |
if citeKey not in citeKeys: | |
citeKeys[citeKey] = 0 | |
output.append(lines[i]) | |
i += 1 | |
# Now scan until we find :END: | |
while i < numLines: | |
i = skip_until_re(PROPERTIES_RE, lines, i, output) | |
if i >= numLines: | |
return output | |
indent = PROPERTIES_RE.match(lines[i]).group(1) | |
output.append(lines[i]) | |
i += 1 | |
haveEnd = False | |
while i <= numLines: | |
line = lines[i] | |
m = CITE_KEY_RE.match(line) | |
if m: | |
i += 1 | |
continue | |
else: | |
m = END_RE.match(line) | |
if m: | |
# Insert next CITE-KEY | |
citeKeys[citeKey] += 1 | |
output.append("%s:CITE-KEY: %s:%d\n" % (indent, citeKey, citeKeys[citeKey])) | |
haveEnd = True | |
output.append(line) | |
i += 1 | |
if haveEnd: | |
break | |
return output | |
if __name__ == "__main__": | |
rootPath = os.path.realpath(os.path.expanduser(sys.argv[1])) | |
for root, dirs, files in os.walk(rootPath): | |
for filename in [filename for filename in files if filename.endswith(".txt")]: | |
print filename | |
filePath = os.path.join(root, filename) | |
with open(filePath) as f: | |
lines = f.readlines() | |
output = parse(lines) | |
with open(filePath, "w") as f: | |
f.write("".join(output)) | |
for key in sorted(citeKeys.keys(), cmp=lambda x,y: cmp(x.lower(), y.lower())): | |
print "%s:%d" % (key, citeKeys[key]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment