Created
October 12, 2015 12:38
-
-
Save joezuntz/55220a8c9c9c9ba8e062 to your computer and use it in GitHub Desktop.
Parser for the LSST DESC SRM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import glob | |
| import re | |
| import pyparsing as pp | |
| import collections | |
| def parse_projects(text): | |
| #the list which will contain the located projects | |
| projects = [] | |
| #this enables parsing something with nested brackets | |
| #just finding the outermost thing | |
| SB = pp.nestedExpr(opener='[', closer=']') | |
| RB = pp.nestedExpr(opener='{', closer='}') | |
| #The latex patterns we are looking for. | |
| project_pattern = r"\keyproject" + SB + RB + RB | |
| deliverable_pattern = r"\deliverable" + SB + RB + RB | |
| keytask_pattern = r"\keytask" + SB + RB + RB | |
| prereq_pattern = r"\prereq" + pp.nestedExpr(opener='{', closer='}') | |
| # \prereq{\deliverableref{TJP2-DC1-SW1}, \deliverableref{CI5}} | |
| #tell each pattern to append to the project list when | |
| #a project is found. Because we pass the same list object | |
| #to all of them they can all read/write from/to the same one | |
| project_pattern.setParseAction(ProjectParser(projects)) | |
| deliverable_pattern.setParseAction(DeliverableParser(projects)) | |
| keytask_pattern.setParseAction(KeyTaskParser(projects)) | |
| prereq_pattern.setParseAction(PreReqParser(projects)) | |
| #the overall search pattern is to look for any of the | |
| #three patterns defined above | |
| search_pattern = pp.Or([project_pattern, deliverable_pattern, keytask_pattern, | |
| prereq_pattern]) | |
| #scan the text. | |
| #we have to use a loop because otherwise it returns a lazy generator | |
| #that doesn't actually start work | |
| for _ in search_pattern.scanString(text): | |
| pass | |
| #return our list of projects | |
| return projects | |
| class ElementParser(object): | |
| def __init__(self, projects): | |
| self.projects = projects | |
| def parse(self, content): | |
| # All the elements we are parsing here have the same | |
| #signature, fortunately. | |
| code = match_to_string(content[1]) | |
| date = match_to_string(content[2]) | |
| name = match_to_string(content[3]) | |
| info = {} | |
| info['name'] = name | |
| info['code'] = code | |
| info['date'] = date | |
| return info | |
| class ProjectParser(ElementParser): | |
| def __call__(self, s, loc, content): | |
| info = self.parse(content) | |
| info['deliverables'] = [] | |
| self.projects.append(info) | |
| class DeliverableParser(ElementParser): | |
| def __call__(self, s, loc, content): | |
| info = self.parse(content) | |
| info['keytasks'] = [] | |
| info['prereqs'] = [] | |
| current_project = self.projects[-1] | |
| current_project['deliverables'].append(info) | |
| class KeyTaskParser(ElementParser): | |
| def __call__(self, s, loc, content): | |
| info = self.parse(content) | |
| current_project = self.projects[-1] | |
| current_deliverable = current_project['deliverables'][-1] | |
| current_deliverable['keytasks'].append(info) | |
| class PreReqParser(object): | |
| def __init__(self, projects): | |
| self.projects = projects | |
| RB = pp.nestedExpr(opener='{', closer='}') | |
| self.projectref_pattern = r"\keyprojectref" + RB | |
| self.deliverableref_pattern = r"\deliverableref" + RB | |
| def __call__(self, s, loc, content): | |
| content = content.asList() | |
| content = content[1] | |
| print '-'*30 | |
| print content | |
| print '-'*30 | |
| if not content: | |
| return | |
| current_project = self.projects[-1] | |
| current_deliverable = current_project['deliverables'][-1] | |
| prereqs = current_deliverable['prereqs'] | |
| i = 0 | |
| while True: | |
| # print content[i], type(content[i]) | |
| if isinstance(content[i], str) and content[i].strip(',').strip()==r'\deliverableref': | |
| for req in content[i+1]: | |
| if req==',': continue | |
| prereqs.append('deliverable:'+req) | |
| i+=2 | |
| elif isinstance(content[i], str) and content[i].strip(',').strip()==r'\keyprojectref': | |
| for req in content[i+1]: | |
| if req==',': continue | |
| prereqs.append('keyproject:'+req) | |
| i+=2 | |
| else: | |
| i+=1 | |
| if i>=len(content): | |
| break | |
| print "project {0} reqs:{1}".format(current_project['name'], prereqs) | |
| def find_included_files(base): | |
| #find all files that have been \include'd in our base file | |
| include = re.compile(r'\\include\{([a-zA-z0-9]+)\}', re.VERBOSE|re.MULTILINE) | |
| text = read_latex(base) | |
| return include.findall(text) | |
| def read_latex(filename): | |
| #read a file and remove latex-commented lines | |
| text = '\n'.join(line for line in open(filename).readlines() if not line.strip().startswith('%')) | |
| return text | |
| def match_to_string(m): | |
| #flatten a list of matches into a space-separated string. | |
| if isinstance(m, str): | |
| return m | |
| return ' '.join(match_to_string(mi) for mi in m) | |
| def dump_projects(groups): | |
| for filename, projects in groups.items(): | |
| if not projects: continue | |
| print filename.upper() | |
| print "="*max(len(filename), 10) | |
| for proj in projects: | |
| print "+ {0[name]} [{0[code]}]".format(proj) | |
| for dv in proj['deliverables']: | |
| print ' - {0[name]} [{0[code]}]'.format(dv) | |
| for kt in dv['keytasks']: | |
| print ' + {0[name]} [{0[code]}]'.format(kt) | |
| def main(): | |
| #find all the files included in the base "srm.tex" file | |
| filenames = find_included_files("srm.tex") | |
| groups = {} | |
| for filename in filenames: | |
| text = read_latex(filename+".tex") | |
| projects = parse_projects(text) | |
| if projects: | |
| groups[filename] = projects | |
| # dump_projects(groups) | |
| #Some other possible outputs: | |
| import json | |
| print json.dumps(groups) | |
| # import yaml | |
| # print yaml.dump(groups) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment