Skip to content

Instantly share code, notes, and snippets.

@edenau
Last active December 27, 2019 16:41
Show Gist options
  • Save edenau/81693265394ce40090d5bb037d9a009a to your computer and use it in GitHub Desktop.
Save edenau/81693265394ce40090d5bb037d9a009a to your computer and use it in GitHub Desktop.
class scaler:
def __init__(self, mean, std):
self.mean = mean
self.std = std
def get_scaler(row):
mean = np.mean(row)
std = np.std(row)
return scaler(mean, std)
def standardize(data, scaler):
return (data - scaler.mean) / scaler.std
def unstandardize(data, scaler):
return (data * scaler.std) + scaler.mean
# Construct scalers from training set
X_scalers = [get_scaler(X_raw_train[row,:]) for row in range(X_num_row)]
X_train = np.array([standardize(X_raw_train[row,:], X_scalers[row]) for row in range(X_num_row)])
y_scalers = [get_scaler(y_raw_train[row,:]) for row in range(y_num_row)]
y_train = np.array([standardize(y_raw_train[row,:], y_scalers[row]) for row in range(y_num_row)])
# Apply those scalers to testing set
X_test = np.array([standardize(X_raw_test[row,:], X_scalers[row]) for row in range(X_num_row)])
y_test = np.array([standardize(y_raw_test[row,:], y_scalers[row]) for row in range(y_num_row)])
# Check if data has been standardized
print([X_train[row,:].mean() for row in range(X_num_row)]) # should be close to zero
print([X_train[row,:].std() for row in range(X_num_row)]) # should be close to one
print([y_train[row,:].mean() for row in range(y_num_row)]) # should be close to zero
print([y_train[row,:].std() for row in range(y_num_row)]) # should be close to one
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment