lukecampbell · February 7, 2017 20:36
diff --git a/convert.py b/convert.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 '''
 convert.py
 Copyright 2016 RPS

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 '''

 from __future__ import print_function
 from argparse import ArgumentParser
 from netCDF4 import Dataset
 import sys
 import numpy as np
 import re
 import os
 import glob


 try:
    basestring
 except NameError:
    basestring = str


 def main():
    '''
    Replace occurrences of valid_min/valid_max where they appear as strings with the numeric types
    '''
    parser = ArgumentParser(description=main.__doc__)
    parser.add_argument('path', nargs='?', default=os.getcwd(), help='Optional path to traverse')
    args = parser.parse_args()
    iter_files(args.path)

    return 0


 def iter_files(root_path):
    '''
    Iterate over each file in the root_path and patch each file that is found
    to have strings for valid_min/valid_max
    '''
    for filepath in glob.glob(os.path.join(root_path, '**/*.nc')):
        if check_dataset(filepath):
            patch_dataset(filepath)


 def check_dataset(netcdf_path):
    '''
    Returns True if the dataset contains a valid_min/valid_max attribute defined as a string
    '''
    with Dataset(netcdf_path, 'r') as nc:
        for variable in nc.variables:
            valid_min = getattr(nc.variables[variable], 'valid_min', None)
            valid_max = getattr(nc.variables[variable], 'valid_max', None)
            if isinstance(valid_min, basestring):
                return True

            if isinstance(valid_max, basestring):
                return True

    return False


 def patch_dataset(netcdf_path):
    '''
    Opens a netCDF file in r+ mode and patches the valid_min/valid_max attributes
    '''
    with Dataset(netcdf_path, 'r+') as nc:
        for variable in nc.variables:
            for attribute in ('valid_min', 'valid_max'):
                attr_value = getattr(nc.variables[variable], attribute, None)
                if isinstance(attr_value, basestring):
                    patch_variable(nc, variable, attribute)


 def patch_variable(nc, variable, attribute):
    '''
    Converts the attribute for a given netCDF variable to the numeric type
    reflecting the parent variable's type

    :param netCDF4.Dataset nc: An open netCDF file descriptor with r+/w modes
    :param str variable: Name of the variable to patch
    :param str attribute: Name of the attribute to patch
    '''
    ncvar = nc.variables[variable]
    attr_value = getattr(ncvar, attribute)
    var_dtype = ncvar.dtype
    try:
        better_value = np.array(convert_numeric_literals(attr_value)).astype(var_dtype)
        setattr(ncvar, attribute, better_value)
    except ValueError:
        if attr_value.strip() == "":
            ncvar.delncattr(attribute)
        else:
            raise


 def convert_numeric_literals(string):
    '''
    Returns a numpy numeral based on the string supplied

    :param str string: A string formatted like a number
    '''

    float_const = r'([+-]?[0-9]*\.[0-9]*(?:[eE][+-]?[0-9]+)?)[Ff]?'
    float_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Ff]?$'
    double_const = r'([+-]?[0-9]*\.[0-9]*(?:[eE][+-]?[0-9]+)?)[Dd]?'
    double_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Dd]?$'

    byte_const = r"^[+-]?([0-9]+)[Bb]?$"
    char_const = r"^\'([^\\])\'$"
    escape_const = r"\'\\(.)\'$"
    octal_const = r"^\'\\([0-7]{1,3})\'$"
    hex_const = r"^\'\\[xX]([0-9a-fA-F]{1,2})\'$"

    if re.match(byte_const, string):
        groups = re.match(byte_const, string).groups()
        return int(groups[0])
    elif re.match(char_const, string):
        groups = re.match(char_const, string).groups()
        return ord(groups[0])
    elif re.match(escape_const, string):
        groups = re.match(escape_const, string).groups()
        return ord(eval("'\\{}'".format(groups[0])))
    elif re.match(octal_const, string):
        groups = re.match(octal_const, string).groups()
        return int(groups[0], 8)
    elif re.match(hex_const, string):
        groups = re.match(hex_const, string).groups()
        return int(groups[0], 16)
    elif re.match(float_const, string):
        groups = re.match(float_const, string).groups()
        return float(groups[0])
    elif re.match(float_exp_const, string):
        groups = re.match(float_exp_const, string).groups()
        return float(groups[0])
    elif re.match(double_const, string):
        groups = re.match(double_const, string).groups()
        return float(groups[0])
    elif re.match(double_exp_const, string):
        groups = re.match(double_exp_const, string).groups()
        return float(groups[0])

    raise ValueError("%s is not a valid numeric type" % repr(string))


 if __name__ == '__main__':
    sys.exit(main())
diff --git a/test_convert.py b/test_convert.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 '''
 test_convert.py

 Copyright 2016 RPS

 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
 the Software without restriction, including without limitation the rights to
 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 of the Software, and to permit persons to whom the Software is furnished to do
 so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 '''

 from __future__ import print_function
 from unittest import TestCase
 from convert import convert_numeric_literals


 class TestConvert(TestCase):

    def test_convert_numeric_literals(self):

        assertion_map = {
            '12b': 12,
            '1b': 1,
            '255B': 255,
            "'3'": ord('3'),
            "'a'": ord('a'),
            "'!'": ord('!'),
            "'\\n'": ord('\n'),
            "'\\r'": ord('\r'),
            "'\0'": 0,
            "'\\755'": 0755,
            "'\\7'": 07,
            "'\\x24'": 0x24,
            "'\\xff'": 0xff,
            "'\\xA'": 0xa,
            "2e3f": 2e3,
            "3.1415": 3.1415,
            "-4.12e1f": -41.2,
            "2e3d": 2e3,
            "3.1415d": 3.1415,
            "-2.12e1d": -21.2,
            "0": 0
        }

        for string in assertion_map:
            assert convert_numeric_literals(string) == assertion_map[string]
	#!/usr/bin/env python
	# -- coding: utf-8 --
	'''
	convert.py
	Copyright 2016 RPS

	Permission is hereby granted, free of charge, to any person obtaining a copy of
	this software and associated documentation files (the "Software"), to deal in
	the Software without restriction, including without limitation the rights to
	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	of the Software, and to permit persons to whom the Software is furnished to do
	so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
	'''

	from __future__ import print_function
	from argparse import ArgumentParser
	from netCDF4 import Dataset
	import sys
	import numpy as np
	import re
	import os
	import glob


	try:
	basestring
	except NameError:
	basestring = str


	def main():
	'''
	Replace occurrences of valid_min/valid_max where they appear as strings with the numeric types
	'''
	parser = ArgumentParser(description=main.__doc__)
	parser.add_argument('path', nargs='?', default=os.getcwd(), help='Optional path to traverse')
	args = parser.parse_args()
	iter_files(args.path)

	return 0


	def iter_files(root_path):
	'''
	Iterate over each file in the root_path and patch each file that is found
	to have strings for valid_min/valid_max
	'''
	for filepath in glob.glob(os.path.join(root_path, '*/.nc')):
	if check_dataset(filepath):
	patch_dataset(filepath)


	def check_dataset(netcdf_path):
	'''
	Returns True if the dataset contains a valid_min/valid_max attribute defined as a string
	'''
	with Dataset(netcdf_path, 'r') as nc:
	for variable in nc.variables:
	valid_min = getattr(nc.variables[variable], 'valid_min', None)
	valid_max = getattr(nc.variables[variable], 'valid_max', None)
	if isinstance(valid_min, basestring):
	return True

	if isinstance(valid_max, basestring):
	return True

	return False


	def patch_dataset(netcdf_path):
	'''
	Opens a netCDF file in r+ mode and patches the valid_min/valid_max attributes
	'''
	with Dataset(netcdf_path, 'r+') as nc:
	for variable in nc.variables:
	for attribute in ('valid_min', 'valid_max'):
	attr_value = getattr(nc.variables[variable], attribute, None)
	if isinstance(attr_value, basestring):
	patch_variable(nc, variable, attribute)


	def patch_variable(nc, variable, attribute):
	'''
	Converts the attribute for a given netCDF variable to the numeric type
	reflecting the parent variable's type

	:param netCDF4.Dataset nc: An open netCDF file descriptor with r+/w modes
	:param str variable: Name of the variable to patch
	:param str attribute: Name of the attribute to patch
	'''
	ncvar = nc.variables[variable]
	attr_value = getattr(ncvar, attribute)
	var_dtype = ncvar.dtype
	try:
	better_value = np.array(convert_numeric_literals(attr_value)).astype(var_dtype)
	setattr(ncvar, attribute, better_value)
	except ValueError:
	if attr_value.strip() == "":
	ncvar.delncattr(attribute)
	else:
	raise


	def convert_numeric_literals(string):
	'''
	Returns a numpy numeral based on the string supplied

	:param str string: A string formatted like a number
	'''

	float_const = r'([+-]?[0-9]\.[0-9](?:[eE][+-]?[0-9]+)?)[Ff]?'
	float_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Ff]?$'
	double_const = r'([+-]?[0-9]\.[0-9](?:[eE][+-]?[0-9]+)?)[Dd]?'
	double_exp_const = r'^([+-]?[0-9]*(?:[eE][+-]?[0-9]+))[Dd]?$'

	byte_const = r"^[+-]?([0-9]+)[Bb]?$"
	char_const = r"^\'([^\\])\'$"
	escape_const = r"\'\\(.)\'$"
	octal_const = r"^\'\\([0-7]{1,3})\'$"
	hex_const = r"^\'\\[xX]([0-9a-fA-F]{1,2})\'$"

	if re.match(byte_const, string):
	groups = re.match(byte_const, string).groups()
	return int(groups[0])
	elif re.match(char_const, string):
	groups = re.match(char_const, string).groups()
	return ord(groups[0])
	elif re.match(escape_const, string):
	groups = re.match(escape_const, string).groups()
	return ord(eval("'\\{}'".format(groups[0])))
	elif re.match(octal_const, string):
	groups = re.match(octal_const, string).groups()
	return int(groups[0], 8)
	elif re.match(hex_const, string):
	groups = re.match(hex_const, string).groups()
	return int(groups[0], 16)
	elif re.match(float_const, string):
	groups = re.match(float_const, string).groups()
	return float(groups[0])
	elif re.match(float_exp_const, string):
	groups = re.match(float_exp_const, string).groups()
	return float(groups[0])
	elif re.match(double_const, string):
	groups = re.match(double_const, string).groups()
	return float(groups[0])
	elif re.match(double_exp_const, string):
	groups = re.match(double_exp_const, string).groups()
	return float(groups[0])

	raise ValueError("%s is not a valid numeric type" % repr(string))


	if __name__ == '__main__':
	sys.exit(main())
	#!/usr/bin/env python
	# -- coding: utf-8 --
	'''
	test_convert.py

	Copyright 2016 RPS

	Permission is hereby granted, free of charge, to any person obtaining a copy of
	this software and associated documentation files (the "Software"), to deal in
	the Software without restriction, including without limitation the rights to
	use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
	of the Software, and to permit persons to whom the Software is furnished to do
	so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all
	copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	SOFTWARE.
	'''

	from __future__ import print_function
	from unittest import TestCase
	from convert import convert_numeric_literals


	class TestConvert(TestCase):

	def test_convert_numeric_literals(self):

	assertion_map = {
	'12b': 12,
	'1b': 1,
	'255B': 255,
	"'3'": ord('3'),
	"'a'": ord('a'),
	"'!'": ord('!'),
	"'\\n'": ord('\n'),
	"'\\r'": ord('\r'),
	"'\0'": 0,
	"'\\755'": 0755,
	"'\\7'": 07,
	"'\\x24'": 0x24,
	"'\\xff'": 0xff,
	"'\\xA'": 0xa,
	"2e3f": 2e3,
	"3.1415": 3.1415,
	"-4.12e1f": -41.2,
	"2e3d": 2e3,
	"3.1415d": 3.1415,
	"-2.12e1d": -21.2,
	"0": 0
	}

	for string in assertion_map:
	assert convert_numeric_literals(string) == assertion_map[string]