Created
February 7, 2017 20:36
-
-
Save lukecampbell/e12936b4261ed2cdaeab5a954b033825 to your computer and use it in GitHub Desktop.
Convert valid_min/valid_max strings to numeric data types
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
''' | |
convert.py | |
Copyright 2016 RPS | |
Permission is hereby granted, free of charge, to any person obtaining a copy of | |
this software and associated documentation files (the "Software"), to deal in | |
the Software without restriction, including without limitation the rights to | |
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |
of the Software, and to permit persons to whom the Software is furnished to do | |
so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
''' | |
from __future__ import print_function | |
from argparse import ArgumentParser | |
from netCDF4 import Dataset | |
import sys | |
import numpy as np | |
import re | |
import os | |
import glob | |
try: | |
basestring | |
except NameError: | |
basestring = str | |
def main(): | |
''' | |
Replace occurrences of valid_min/valid_max where they appear as strings with the numeric types | |
''' | |
parser = ArgumentParser(description=main.__doc__) | |
parser.add_argument('path', nargs='?', default=os.getcwd(), help='Optional path to traverse') | |
args = parser.parse_args() | |
iter_files(args.path) | |
return 0 | |
def iter_files(root_path): | |
''' | |
Iterate over each file in the root_path and patch each file that is found | |
to have strings for valid_min/valid_max | |
''' | |
for filepath in glob.glob(os.path.join(root_path, '**/*.nc')): | |
if check_dataset(filepath): | |
patch_dataset(filepath) | |
def check_dataset(netcdf_path): | |
''' | |
Returns True if the dataset contains a valid_min/valid_max attribute defined as a string | |
''' | |
with Dataset(netcdf_path, 'r') as nc: | |
for variable in nc.variables: | |
valid_min = getattr(nc.variables[variable], 'valid_min', None) | |
valid_max = getattr(nc.variables[variable], 'valid_max', None) | |
if isinstance(valid_min, basestring): | |
return True | |
if isinstance(valid_max, basestring): | |
return True | |
return False | |
def patch_dataset(netcdf_path): | |
''' | |
Opens a netCDF file in r+ mode and patches the valid_min/valid_max attributes | |
''' | |
with Dataset(netcdf_path, 'r+') as nc: | |
for variable in nc.variables: | |
for attribute in ('valid_min', 'valid_max'): | |
attr_value = getattr(nc.variables[variable], attribute, None) | |
if isinstance(attr_value, basestring): | |
patch_variable(nc, variable, attribute) | |
def patch_variable(nc, variable, attribute): | |
''' | |
Converts the attribute for a given netCDF variable to the numeric type | |
reflecting the parent variable's type | |
:param netCDF4.Dataset nc: An open netCDF file descriptor with r+/w modes | |
:param str variable: Name of the variable to patch | |
:param str attribute: Name of the attribute to patch | |
''' | |
ncvar = nc.variables[variable] | |
attr_value = getattr(ncvar, attribute) | |
var_dtype = ncvar.dtype | |
try: | |
better_value = np.array(convert_numeric_literals(attr_value)).astype(var_dtype) | |
setattr(ncvar, attribute, better_value) | |
except ValueError: | |
if attr_value.strip() == "": | |
ncvar.delncattr(attribute) | |
else: | |
raise | |
def convert_numeric_literals(string): | |
''' | |
Returns a numpy numeral based on the string supplied | |
:param str string: A string formatted like a number | |
''' | |
float_const = r'([+-]?[0-9]*\.[0-9]*(?:[eE][+-]?[0-9]+)?)[Ff]?' | |
float_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Ff]?$' | |
double_const = r'([+-]?[0-9]*\.[0-9]*(?:[eE][+-]?[0-9]+)?)[Dd]?' | |
double_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Dd]?$' | |
byte_const = r"^[+-]?([0-9]+)[Bb]?$" | |
char_const = r"^\'([^\\])\'$" | |
escape_const = r"\'\\(.)\'$" | |
octal_const = r"^\'\\([0-7]{1,3})\'$" | |
hex_const = r"^\'\\[xX]([0-9a-fA-F]{1,2})\'$" | |
if re.match(byte_const, string): | |
groups = re.match(byte_const, string).groups() | |
return int(groups[0]) | |
elif re.match(char_const, string): | |
groups = re.match(char_const, string).groups() | |
return ord(groups[0]) | |
elif re.match(escape_const, string): | |
groups = re.match(escape_const, string).groups() | |
return ord(eval("'\\{}'".format(groups[0]))) | |
elif re.match(octal_const, string): | |
groups = re.match(octal_const, string).groups() | |
return int(groups[0], 8) | |
elif re.match(hex_const, string): | |
groups = re.match(hex_const, string).groups() | |
return int(groups[0], 16) | |
elif re.match(float_const, string): | |
groups = re.match(float_const, string).groups() | |
return float(groups[0]) | |
elif re.match(float_exp_const, string): | |
groups = re.match(float_exp_const, string).groups() | |
return float(groups[0]) | |
elif re.match(double_const, string): | |
groups = re.match(double_const, string).groups() | |
return float(groups[0]) | |
elif re.match(double_exp_const, string): | |
groups = re.match(double_exp_const, string).groups() | |
return float(groups[0]) | |
raise ValueError("%s is not a valid numeric type" % repr(string)) | |
if __name__ == '__main__': | |
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
''' | |
test_convert.py | |
Copyright 2016 RPS | |
Permission is hereby granted, free of charge, to any person obtaining a copy of | |
this software and associated documentation files (the "Software"), to deal in | |
the Software without restriction, including without limitation the rights to | |
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | |
of the Software, and to permit persons to whom the Software is furnished to do | |
so, subject to the following conditions: | |
The above copyright notice and this permission notice shall be included in all | |
copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
SOFTWARE. | |
''' | |
from __future__ import print_function | |
from unittest import TestCase | |
from convert import convert_numeric_literals | |
class TestConvert(TestCase): | |
def test_convert_numeric_literals(self): | |
assertion_map = { | |
'12b': 12, | |
'1b': 1, | |
'255B': 255, | |
"'3'": ord('3'), | |
"'a'": ord('a'), | |
"'!'": ord('!'), | |
"'\\n'": ord('\n'), | |
"'\\r'": ord('\r'), | |
"'\0'": 0, | |
"'\\755'": 0755, | |
"'\\7'": 07, | |
"'\\x24'": 0x24, | |
"'\\xff'": 0xff, | |
"'\\xA'": 0xa, | |
"2e3f": 2e3, | |
"3.1415": 3.1415, | |
"-4.12e1f": -41.2, | |
"2e3d": 2e3, | |
"3.1415d": 3.1415, | |
"-2.12e1d": -21.2, | |
"0": 0 | |
} | |
for string in assertion_map: | |
assert convert_numeric_literals(string) == assertion_map[string] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment