Created
March 16, 2014 10:23
-
-
Save bsdelf/9581226 to your computer and use it in GitHub Desktop.
parse & statistical
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import re | |
import copy | |
from collections import defaultdict | |
""" | |
format: | |
*********************** PAPER 1 *********************** | |
+++++++++++++++++ REVIEW 1 ++++++++++++++++ | |
+++++++++++++++++ REVIEW 2 ++++++++++++++++ | |
*********************** PAPER 2 *********************** | |
+++++++++++++++++ REVIEW 1 ++++++++++++++++ | |
+++++++++++++++++ REVIEW 2 ++++++++++++++++ | |
+++++++++++++++++ REVIEW 3 ++++++++++++++++ | |
""" | |
class Paper(object): | |
def __init__(self): | |
self.idx = -1 | |
self.recmap = defaultdict( | |
int, | |
{ | |
'推荐到 NIPS 发表' : 0, | |
'推荐到 ICCV 发表' : 0, | |
'推荐到 JMLR 发表' : 0, | |
'推荐到 CVPR 发表' : 0 | |
} | |
) | |
# for python earlier than 3.4 | |
def enum(**enums): | |
return type('Enum', (), enums) | |
Status = enum(FIND_PAPER=1, FIND_CONTENT=2) | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument(dest="file", metavar="file", nargs=1) | |
args = parser.parse_args() | |
re_paper = re.compile('\*+\sPAPER\s(\d+)\s\*+') | |
#re_review = re.compile('\++\sREVIEW\s(\d+)\s\++') | |
re_content = re.compile('(推荐.*)?: (\d+)') | |
paper = None | |
status = Status.FIND_PAPER | |
papers = [] | |
f = open(args.file[0]) | |
for line in f: | |
if status == Status.FIND_PAPER: | |
m = re_paper.match(line) | |
if m != None: | |
paper = Paper() | |
paper.idx = int(m.group(1)) | |
status = Status.FIND_CONTENT | |
elif status == Status.FIND_CONTENT: | |
m = re_content.match(line) | |
if m != None: | |
key = m.group(1) | |
val = int(m.group(2)) | |
if val > 1: | |
paper.recmap[key] += 1 | |
else: | |
m = re_paper.match(line) | |
if m != None: | |
papers.append(paper) | |
paper = Paper() | |
paper.idx = int(m.group(1)) | |
status = Status.FIND_CONTENT | |
papers.append(paper) | |
papers.sort(key = lambda p: p.idx) | |
f.close() | |
for k, _ in Paper().recmap.items(): | |
print("{:s}\t".format(k), end="") | |
print("") | |
for paper in papers: | |
print("{:d}\t".format(paper.idx), end="") | |
for k, v in paper.recmap.items(): | |
print("{:d}\t".format(v), end="") | |
print("") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment