Last active
March 15, 2017 16:03
-
-
Save mutaku/648b5971c0f981002f4f227271d84810 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
import pandas as pd | |
def standardization(x, args): | |
"""Zero mean and unit variance scaling""" | |
return (x - args['mean']) / args['std'] | |
def rescaling(x, args): | |
"""Min - Max scaling [0-1]""" | |
return (x - args['min']) / (args['max'] - args['min']) | |
def feature_scaling(array, method=standardization): | |
"""Perform feature scaling for given | |
array using method. | |
""" | |
args = dict() | |
if method is rescaling: | |
args['max'] = max(array) | |
args['min'] = min(array) | |
elif method is standardization: | |
args['mean'] = np.mean(array) | |
args['std'] = np.std(array) | |
# We can get rid of asarray if solely being fed from | |
# pandas apply. | |
return np.asarray(map(lambda x: method(x, args), array)) | |
# Example: | |
# Build a matrix of the PVs such that each row is a parameter | |
# and each column is a parameter vector. | |
# Then apply the feature_scaling function to each row of the | |
# pandas matrix. | |
pv_df = pd.concat(map(pd.Series, parameter_vectors), axis=1) | |
pv_feature_scaled = pv_df.apply(feature_scaling, axis=1, args=(standardization,)) | |
# Here is a generated data implementation of what was just described: | |
In [110]: l_as_df | |
Out[110]: | |
0 1 2 | |
0 0.00 22.2000 52.000 | |
1 1.96 23.9316 53.508 | |
2 4.00 26.6400 57.200 | |
3 11.88 57.1428 118.404 | |
In [111]: pv_feature_scaled = l_as_df.apply(feature_scaling, axis=1, args=(standardization,)) | |
In [112]: pv_feature_scaled | |
Out[112]: | |
0 1 2 | |
0 -1.160952 -0.118912 1.279863 | |
1 -1.160315 -0.120022 1.280337 | |
2 -1.159692 -0.121107 1.280799 | |
3 -1.159083 -0.122168 1.281250 | |
In [113]: l_as_df.apply(feature_scaling, axis=1, args=(rescaling,)) | |
Out[113]: | |
0 1 2 | |
0 0.0 0.426923 1.0 | |
1 0.0 0.426236 1.0 | |
2 0.0 0.425564 1.0 | |
3 0.0 0.424907 1.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [20]: m = np.zeros((5, 5)) | |
In [21]: for x in combinations(range(6), 2): | |
...: m[(x[0], x[1] - 1)] = 3 | |
...: | |
In [22]: m | |
Out[22]: | |
array([[ 3., 3., 3., 3., 3.], | |
[ 0., 3., 3., 3., 3.], | |
[ 0., 0., 3., 3., 3.], | |
[ 0., 0., 0., 3., 3.], | |
[ 0., 0., 0., 0., 3.]]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment