Last active
July 10, 2022 08:43
-
-
Save smartkiwi/4506282 to your computer and use it in GitHub Desktop.
FORTRAN namelist parser based on http://code.google.com/p/fortran-namelist/ Updated by Volodymyr Vladymyrov (http://stackoverflow.com/users/1296661/vvladymyrov) to parse format presented in http://stackoverflow.com/questions/14165733/regular-expression-parsing-key-value-pairs-in-namelist-input-file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Parses Fortran Namelists | |
based on Fortran Namelist parser for Python prog/scripts by Stephane Chamberland ([email protected]) | |
http://code.google.com/p/fortran-namelist/ | |
Updated by Volodymyr Vladymyrov (http://stackoverflow.com/users/1296661/vvladymyrov) to parse format presented | |
in http://stackoverflow.com/questions/14165733/regular-expression-parsing-key-value-pairs-in-namelist-input-file | |
added support for | |
1) | |
$NAMELIST | |
$END | |
2) and parameter names with parentheses | |
PLOT(2) = 12 | |
3) also paramters are stored in ordered dictionary | |
Defines | |
Namelist class | |
Usage: | |
python namelist.py | |
Produce the following output: | |
{ | |
{'PLOTTING': {'par': [OrderedDict([('PLOT', ['T']), ('PLOT(2) =', ['12'])])], | |
'raw': ['PLOT=T', 'PLOT(2)=12']}, | |
'VEHICLES': {'par': [OrderedDict([('TRUCKS', ['0']), ('PLAINS', ['0']), ('TRAINS', ['0']), ('LIB', ['AUTO.DAT']), ('DATA', ['1.2', '2.34', '3.12', '4.56E-2', '6.78'])])], | |
'raw': ['TRUCKS = 0', | |
'PLAINS= 0, TRAINS = 0', | |
"LIB='AUTO.DAT'", | |
'DATA=1.2,2.34,3.12', | |
'4.56E-2,6.78']}} | |
Also see example usage starting from line 242 | |
""" | |
from ordereddict import OrderedDict | |
__author__ = 'Volodymyr Vladymyrov ([email protected]), based on works Stephane Chamberland ([email protected])' | |
__version__ = '$Revision: 1.1 $'[11:-2] | |
__date__ = '$Date: 2013/01/10 17:00:00 $' | |
__copyright__ = 'Copyright (c) 2013, 2006 RPN' | |
__license__ = 'LGPL' | |
import re | |
class Namelist(dict): | |
""" | |
Namelist class | |
Scan a Fortran Namelist file and put Section/Parameters into a dictionary | |
Intentiation: | |
foo = Namelist(NamelistFile) | |
where NamelistFile can be a filename, an URL or a string | |
Functions: | |
[Pending] | |
This is a generic Fortan namelist parser | |
it will recognize all namelist in a file with the following format, | |
and ignores the rest. | |
&namelistname | |
opt1 = value1 | |
... | |
/ | |
""" | |
def __init__(self,input_str): | |
dict.__init__(self) | |
self._setFile = input_str | |
self._setContent = input_str | |
#print self._setContent | |
self.update(self.parse()) | |
def parse(self): | |
"""Config file parser, called from the class initialization""" | |
varname = r'\b[a-zA-Z][a-zA-Z0-9_]*\b' | |
valueInt = re.compile(r'[+-]?[0-9]+') | |
valueReal = re.compile(r'[+-]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)') | |
valueNumber = re.compile(r'\b(([\+\-]?[0-9]+)?\.)?[0-9]*([eE][-+]?[0-9]+)?') | |
valueBool = re.compile(r"(\.(true|false|t|f)\.)",re.I) | |
valueTrue = re.compile(r"(\.(true|t)\.)",re.I) | |
spaces = r'[\s\t]*' | |
quote = re.compile(r"[\s\t]*[\'\"]") | |
namelistname = re.compile(r"^[\s\t]*\$(" + varname + r")[\s\t]*$") | |
#namelistname = re.compile(r"^[\s\t]*\$" + varname) | |
paramname = re.compile(r"[\s\t]*(" + varname+r')[\s\t]*=[\s\t]*') | |
paramname_with_brackets = re.compile(r"[\s\t]*([a-zA-Z][a-zA-Z0-9_\(\)]*)[\s\t]*=[\s\t]*") | |
#namlistend = re.compile(r"^" + spaces + r"/" + spaces + r"$") | |
namlistend = re.compile(r"^\$END") | |
#split sections/namelists | |
mynmlfile = OrderedDict() | |
mynmlname = '' | |
items = self.clean(self._setContent.split("\n"),commentexpr=r"^C.*$",cleancomma=1) | |
# print items | |
for item in items: | |
if re.match(namelistname,item) and not re.match(namlistend,item): | |
mynmlname = re.sub(namelistname,r"\1",item) | |
mynmlfile[mynmlname] = { | |
'raw' : [], | |
'par' : [OrderedDict()] | |
} | |
elif re.match(namlistend,item): | |
mynmlname = '' | |
else: | |
if mynmlname: | |
mynmlfile[mynmlname]['raw'].append(item) | |
#parse param in each section/namelist | |
for mynmlname in mynmlfile.keys(): | |
#split strings | |
bb = [] | |
for item in mynmlfile[mynmlname]['raw']: | |
bb.extend(self.splitstring(item)) | |
#split comma and = | |
aa = [] | |
for item in bb: | |
if not re.match(quote,item): | |
aa.extend(re.sub(r"[\s\t]*=",r" =\n",re.sub(r",+",r"\n",item)).split("\n")) | |
else: | |
aa.append(item) | |
del(bb) | |
aa = self.clean(aa,cleancomma=1) | |
# print aa | |
myparname = '' | |
for item in aa: | |
if re.search(paramname,item) or re.search(paramname_with_brackets,item): | |
myparname = re.sub(paramname,r"\1",item) | |
mynmlfile[mynmlname]['par'][0][myparname] = [] | |
elif paramname: | |
#removed quotes, spaces (then how to distinguish .t. of ".t."?) | |
if re.match(valueBool,item): | |
if re.match(valueTrue,item): | |
mynmlfile[mynmlname]['par'][0][myparname].append('.true.') | |
else: | |
mynmlfile[mynmlname]['par'][0][myparname].append('.false.') | |
else: | |
mynmlfile[mynmlname]['par'][0][myparname].append(re.sub(r"(^[\'\"]|[\'\"]$)",r"",item.strip()).strip()) | |
return mynmlfile | |
#==== Helper functions for Parsing of files | |
def clean(self,mystringlist,commentexpr=r"^[\s\t]*\#.*$",spacemerge=0,cleancomma=0): | |
""" | |
Remove leading and trailing blanks, comments/empty lines from a list of strings | |
mystringlist = foo.clean(mystringlist,spacemerge=0,commentline=r"^[\s\t]*\#",cleancharlist="") | |
commentline: definition of commentline | |
spacemerge: if <>0, merge/collapse multi space | |
cleancomma: Remove leading and trailing commas | |
""" | |
aa = mystringlist | |
if cleancomma: | |
aa = [re.sub("(^([\s\t]*\,)+)|((\,[\s\t]*)+$)","",item).strip() for item in aa] | |
if commentexpr: | |
aa = [re.sub(commentexpr,"",item).strip() for item in aa] | |
if spacemerge: | |
aa = [re.sub("[\s\t]+"," ",item).strip() for item in aa if len(item.strip()) <> 0] | |
else: | |
aa = [item.strip() for item in aa if len(item.strip()) <> 0] | |
return aa | |
def splitstring(self,mystr): | |
""" | |
Split a string in a list of strings at quote boundaries | |
Input: String | |
Output: list of strings | |
""" | |
dquote=r'(^[^\"\']*)(\"[^"]*\")(.*)$' | |
squote=r"(^[^\"\']*)(\'[^']*\')(.*$)" | |
mystrarr = re.sub(dquote,r"\1\n\2\n\3",re.sub(squote,r"\1\n\2\n\3",mystr)).split("\n") | |
#remove zerolenght items | |
mystrarr = [item for item in mystrarr if len(item) <> 0] | |
if len(mystrarr) > 1: | |
mystrarr2 = [] | |
for item in mystrarr: | |
mystrarr2.extend(self.splitstring(item)) | |
mystrarr = mystrarr2 | |
return mystrarr | |
#==== Output function | |
def sec_string(self,secname): | |
""" | |
Return a string containing the "cleaned" content of a sectionContent | |
mysecstring = foo.sec_string(secname) | |
""" | |
try: | |
return "\n".join(self[secname]['raw']) | |
except: | |
return '' | |
def param_singleval(self,secname,parname): | |
aa = self.param_vallist(secname,parname) | |
if aa[0]: | |
return self.param_vallist(secname,parname)[0][0] | |
else: | |
return '' | |
def param_vallist(self,secname,parname,subsec=-1): | |
""" | |
Return a list of values for the specified section/param | |
myvallist = foo.param_vallist(secname,parname,subsec=-1) | |
if subsec is not specified, the list contain the values for param in each subsec | |
""" | |
try: | |
if subsec>=0: | |
return self[secname]['par'][subsec][parname] | |
else: | |
return [item[parname] for item in self[secname]['par']] | |
except: | |
if subsec>=0: | |
return [] | |
else: | |
return [[]] | |
def param_string(self,secname,parname,subsec=-1): | |
""" | |
Return a string of comma separated list of values for the specified section/param | |
myvalliststring = foo.param_string(secname,parname,subsec=-1) | |
if subsec is not specified, the list contain the values for param in each subsec | |
""" | |
try: | |
return self.param_vallist(secname,parname,subsec).__repr__()[1:-1] | |
#alternate way: | |
#return "\n".join(self.param_vallist(secname,parname,subsec)) | |
except: | |
return '' | |
"""Example usage""" | |
if __name__=='__main__': | |
file_str = """ | |
$VEHICLES | |
CARS= 1, | |
TRUCKS = 0, | |
PLAINS= 0, TRAINS = 0, | |
LIB='AUTO.DAT', | |
C This is a comment | |
C Data variable spans multiple lines | |
DATA=1.2,2.34,3.12, | |
4.56E-2,6.78, | |
$END | |
$PLOTTING | |
PLOT=T, | |
PLOT(2)=12, | |
$END | |
""" | |
nl = Namelist(file_str) | |
import pprint | |
pprint.pprint(nl) | |
for param in nl.keys(): | |
print "Namelist name: %s" % param | |
print "\t"+str(nl[param]['par']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment