Skip to content

Instantly share code, notes, and snippets.

@reyoung
Created July 9, 2014 12:51
Show Gist options
  • Save reyoung/ecd1e6174d39f834edd9 to your computer and use it in GitHub Desktop.
Save reyoung/ecd1e6174d39f834edd9 to your computer and use it in GitHub Desktop.
A normalizer Utilities
"""
Normalize Utilities
Usage:
./normalizer.py <csvfile> ... [ --config=<conf> ]
Options:
--config=<conf> Config file [ default: None ]
"""
import docopt
import yaml
import csv
import os.path
__author__ = 'reyoung'
DEFAULT_CONFIG = {
"suffix": "_norm",
"default_method": {
"method": "maxmin",
"min": 0,
"max": 1
}
}
class MaxMin(object):
def __init__(self, **kwds):
self.max = 1
if 'max' in kwds:
try:
self.max = float(kwds['max'])
except ValueError:
pass
self.min = 0
if 'min' in kwds:
try:
self.min = float(kwds['min'])
except ValueError:
pass
try:
inf = float('inf')
except ValueError: # check for a particular exception here?
inf = 1e30000
self.__min = inf
self.__max = -inf
def prepare(self, d):
if d < self.__min:
self.__min = d
if d > self.__max:
self.__max = d
def process(self, d):
return (self.max - self.min) / (self.__max - self.__min) * (d - self.__max) + self.max
def __str__(self):
return "Min %f, Max %f, __Min %f, __Max %f" % (self.min, self.max, self.__min, self.__max)
def __repr__(self):
return self.__str__()
METHOD_MAP = {
'maxmin': MaxMin
}
def main(files, suffix, default_method, *args, **kwds):
rows = {}
if 'rows' in kwds:
for r in kwds['rows']:
rows[r['id']] = r
methods = {}
# prepare stage
for fn in files:
with open(fn, 'r') as f:
reader = csv.reader(f)
for row in reader:
for i, val in enumerate(row):
if i not in methods:
if i in rows:
init_args = rows[i]
else:
init_args = default_method
m = METHOD_MAP[init_args['method']](**init_args)
methods[i] = m
else:
m = methods[i]
m.prepare(float(val))
methods = map(lambda x: methods[x], range(0, methods.__len__()))
for fn in files:
output_name = os.path.splitext(os.path.basename(fn))[0] + suffix + ".csv"
dirname = os.path.dirname(fn)
if dirname.__len__() !=0:
output_name = os.path.dirname(fn) + "/" + output_name
with open(output_name, 'w') as outf:
writer = csv.writer(outf)
with open(fn, 'r') as inf:
reader = csv.reader(inf)
for row in reader:
writer.writerow(map(lambda v: v[1].process(float(v[0])), zip(row, methods)))
if __name__ == '__main__':
args = docopt.docopt(__doc__, version='normalizer V0.1.0')
if args['--config'] is not None:
with open(args['--config'], 'r') as f:
conf = yaml.load(f)
for k in DEFAULT_CONFIG:
if k not in conf:
conf[k] = DEFAULT_CONFIG[k]
else:
conf = DEFAULT_CONFIG
main(args['<csvfile>'], **conf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment