Last active
June 13, 2017 15:13
-
-
Save arnawldo/91612e80a3fb297b6c834d98f7f8b448 to your computer and use it in GitHub Desktop.
Skeleton for hw2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# uses python3 | |
import sys | |
import numpy as np | |
# helper functions | |
def est_class_mean(x, y, label): | |
"""Given n * d numpy array of predictors, n * 1 array of labels and class label of interest, | |
return empirical mean for each variable in given class | |
Input: X = n * d array | |
y = n * 1 array | |
label = int | |
Output: d * 1 array""" | |
subset_x = x[y == label, ] | |
# TODO | |
# use numpy mean function | |
return col_means | |
def est_class_cov(x, y, label): | |
"""Given n * d numpy array of predictors, n * 1 array of labels and class label of interest, | |
return empirical covariance for each variable in given class | |
Input: X = n * d array | |
y = n * 1 array | |
label = int | |
Output: d * d array""" | |
subset_x = x[y == label, ] | |
# TODO | |
# use numpy cov function | |
return col_cov | |
def est_class_prior(y, label): | |
"""Given all training labels, return estimated proir probability of choosing | |
a particular label | |
Input: y = n * 1 array | |
label = int | |
Output: int""" | |
# TODO | |
return prior | |
def est_class_prob(x, prior, cov, mean): | |
"""Given a single test observation, estimated class prior, covariance and mean array, | |
output an estimated class probability | |
Input x = d * 1 array | |
prior = int | |
cov = d * d array | |
mean = d * 1 | |
Output prob = double | |
""" | |
# x - mu | |
.... | |
# magnitude of sigma | |
# use numpy linalg.norm fucntion | |
... | |
# term in exponent | |
exp_term = np.dot( | |
np.dot( | |
# transpose of x - mu..., | |
# inverse of covariance... ), | |
# x - mu...) | |
# probability | |
prob = ... | |
return prob | |
def calc_all_prob(x_train, y_train, x_test): | |
"""Given training predictors and labels, calculate Gaussian | |
class conditional densities for each class for each test observation | |
Input: X_train = n * d array | |
y_train = n * 1 array | |
X_test = (n_test_cases) * d array | |
Output: (n_test_cases) * (n_classes) array""" | |
# all unique class labels | |
class_labels = sorted(np.unique(y_train)) | |
# list of numpy arrays of predictor means for each class | |
class_predictor_means = [] | |
# list of numpy arrays of predictor covariances for each class | |
class_predictor_cov = [] | |
# list of class priors | |
class_priors = [] | |
# list of predicted class probabilities for all test observation | |
class_prob_predictions = [] | |
# calculate class predictor means, predictor covariances. and class priors | |
for cls in class_labels: | |
class_predictor_means.append(est_class_mean(x_train, y_train, cls)) | |
class_predictor_cov.append(est_class_cov(x_train, y_train, cls)) | |
class_priors.append(est_class_prior(y_train, cls)) | |
# calculate class probabilities for each test observation | |
n_test_obs = x_test.shape[0] # number of test observations | |
for i in range(n_test_obs): | |
predicted_probs = [] # class probabilities for single observation | |
obs = x_test[i, ].reshape(-1, ) # current test observation | |
for k in range(len(class_labels)): | |
# calculate probability observation is from this class | |
predicted_probs.append( | |
est_class_prob( | |
obs, | |
class_priors[k], | |
class_predictor_cov[k], | |
class_predictor_means[k])) | |
# store predictions | |
class_prob_predictions.append(predicted_probs) | |
class_prob_predictions = np.array(class_prob_predictions) | |
# Normalize each row | |
# for each row, for each prob, divide prob by row sum | |
# each row must sum to 1 | |
if __name__ == "__main__": | |
# command line args | |
X_train_filename, y_train_filename, X_test_filename = sys.argv[1:] | |
# read in files | |
X_train = np.loadtxt(X_train_filename, delimiter=',') | |
y_train = np.loadtxt(y_train_filename, delimiter=',') | |
X_test = np.loadtxt(X_test_filename, delimiter=',') | |
# calculate probabilities | |
predictions = calc_all_prob(X_train, y_train, X_test) | |
# save results as csv | |
np.savetxt("probs_test.csv", predictions, delimiter=',') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment