Last active
December 27, 2019 16:41
-
-
Save edenau/81693265394ce40090d5bb037d9a009a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class scaler: | |
def __init__(self, mean, std): | |
self.mean = mean | |
self.std = std | |
def get_scaler(row): | |
mean = np.mean(row) | |
std = np.std(row) | |
return scaler(mean, std) | |
def standardize(data, scaler): | |
return (data - scaler.mean) / scaler.std | |
def unstandardize(data, scaler): | |
return (data * scaler.std) + scaler.mean | |
# Construct scalers from training set | |
X_scalers = [get_scaler(X_raw_train[row,:]) for row in range(X_num_row)] | |
X_train = np.array([standardize(X_raw_train[row,:], X_scalers[row]) for row in range(X_num_row)]) | |
y_scalers = [get_scaler(y_raw_train[row,:]) for row in range(y_num_row)] | |
y_train = np.array([standardize(y_raw_train[row,:], y_scalers[row]) for row in range(y_num_row)]) | |
# Apply those scalers to testing set | |
X_test = np.array([standardize(X_raw_test[row,:], X_scalers[row]) for row in range(X_num_row)]) | |
y_test = np.array([standardize(y_raw_test[row,:], y_scalers[row]) for row in range(y_num_row)]) | |
# Check if data has been standardized | |
print([X_train[row,:].mean() for row in range(X_num_row)]) # should be close to zero | |
print([X_train[row,:].std() for row in range(X_num_row)]) # should be close to one | |
print([y_train[row,:].mean() for row in range(y_num_row)]) # should be close to zero | |
print([y_train[row,:].std() for row in range(y_num_row)]) # should be close to one |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment