Skip to content

Instantly share code, notes, and snippets.

@lukecampbell
Created February 7, 2017 20:36
Show Gist options
  • Save lukecampbell/e12936b4261ed2cdaeab5a954b033825 to your computer and use it in GitHub Desktop.
Save lukecampbell/e12936b4261ed2cdaeab5a954b033825 to your computer and use it in GitHub Desktop.
Convert valid_min/valid_max strings to numeric data types
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
convert.py
Copyright 2016 RPS
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''
from __future__ import print_function
from argparse import ArgumentParser
from netCDF4 import Dataset
import sys
import numpy as np
import re
import os
import glob
try:
basestring
except NameError:
basestring = str
def main():
'''
Replace occurrences of valid_min/valid_max where they appear as strings with the numeric types
'''
parser = ArgumentParser(description=main.__doc__)
parser.add_argument('path', nargs='?', default=os.getcwd(), help='Optional path to traverse')
args = parser.parse_args()
iter_files(args.path)
return 0
def iter_files(root_path):
'''
Iterate over each file in the root_path and patch each file that is found
to have strings for valid_min/valid_max
'''
for filepath in glob.glob(os.path.join(root_path, '**/*.nc')):
if check_dataset(filepath):
patch_dataset(filepath)
def check_dataset(netcdf_path):
'''
Returns True if the dataset contains a valid_min/valid_max attribute defined as a string
'''
with Dataset(netcdf_path, 'r') as nc:
for variable in nc.variables:
valid_min = getattr(nc.variables[variable], 'valid_min', None)
valid_max = getattr(nc.variables[variable], 'valid_max', None)
if isinstance(valid_min, basestring):
return True
if isinstance(valid_max, basestring):
return True
return False
def patch_dataset(netcdf_path):
'''
Opens a netCDF file in r+ mode and patches the valid_min/valid_max attributes
'''
with Dataset(netcdf_path, 'r+') as nc:
for variable in nc.variables:
for attribute in ('valid_min', 'valid_max'):
attr_value = getattr(nc.variables[variable], attribute, None)
if isinstance(attr_value, basestring):
patch_variable(nc, variable, attribute)
def patch_variable(nc, variable, attribute):
'''
Converts the attribute for a given netCDF variable to the numeric type
reflecting the parent variable's type
:param netCDF4.Dataset nc: An open netCDF file descriptor with r+/w modes
:param str variable: Name of the variable to patch
:param str attribute: Name of the attribute to patch
'''
ncvar = nc.variables[variable]
attr_value = getattr(ncvar, attribute)
var_dtype = ncvar.dtype
try:
better_value = np.array(convert_numeric_literals(attr_value)).astype(var_dtype)
setattr(ncvar, attribute, better_value)
except ValueError:
if attr_value.strip() == "":
ncvar.delncattr(attribute)
else:
raise
def convert_numeric_literals(string):
'''
Returns a numpy numeral based on the string supplied
:param str string: A string formatted like a number
'''
float_const = r'([+-]?[0-9]*\.[0-9]*(?:[eE][+-]?[0-9]+)?)[Ff]?'
float_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Ff]?$'
double_const = r'([+-]?[0-9]*\.[0-9]*(?:[eE][+-]?[0-9]+)?)[Dd]?'
double_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Dd]?$'
byte_const = r"^[+-]?([0-9]+)[Bb]?$"
char_const = r"^\'([^\\])\'$"
escape_const = r"\'\\(.)\'$"
octal_const = r"^\'\\([0-7]{1,3})\'$"
hex_const = r"^\'\\[xX]([0-9a-fA-F]{1,2})\'$"
if re.match(byte_const, string):
groups = re.match(byte_const, string).groups()
return int(groups[0])
elif re.match(char_const, string):
groups = re.match(char_const, string).groups()
return ord(groups[0])
elif re.match(escape_const, string):
groups = re.match(escape_const, string).groups()
return ord(eval("'\\{}'".format(groups[0])))
elif re.match(octal_const, string):
groups = re.match(octal_const, string).groups()
return int(groups[0], 8)
elif re.match(hex_const, string):
groups = re.match(hex_const, string).groups()
return int(groups[0], 16)
elif re.match(float_const, string):
groups = re.match(float_const, string).groups()
return float(groups[0])
elif re.match(float_exp_const, string):
groups = re.match(float_exp_const, string).groups()
return float(groups[0])
elif re.match(double_const, string):
groups = re.match(double_const, string).groups()
return float(groups[0])
elif re.match(double_exp_const, string):
groups = re.match(double_exp_const, string).groups()
return float(groups[0])
raise ValueError("%s is not a valid numeric type" % repr(string))
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
test_convert.py
Copyright 2016 RPS
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''
from __future__ import print_function
from unittest import TestCase
from convert import convert_numeric_literals
class TestConvert(TestCase):
def test_convert_numeric_literals(self):
assertion_map = {
'12b': 12,
'1b': 1,
'255B': 255,
"'3'": ord('3'),
"'a'": ord('a'),
"'!'": ord('!'),
"'\\n'": ord('\n'),
"'\\r'": ord('\r'),
"'\0'": 0,
"'\\755'": 0755,
"'\\7'": 07,
"'\\x24'": 0x24,
"'\\xff'": 0xff,
"'\\xA'": 0xa,
"2e3f": 2e3,
"3.1415": 3.1415,
"-4.12e1f": -41.2,
"2e3d": 2e3,
"3.1415d": 3.1415,
"-2.12e1d": -21.2,
"0": 0
}
for string in assertion_map:
assert convert_numeric_literals(string) == assertion_map[string]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment