Last active
April 4, 2016 21:02
-
-
Save mattiasarro/6eb2b76d65a5768a026f0f3b196c230b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Authors: Daniyal Shahrokhian, Mattias Arro | |
import numpy as np | |
from scipy.stats import bernoulli | |
# Generates n points(labels) accodring to the distribution of the exercise | |
def gen_points(x_values): | |
points = [] | |
for x in x_values: | |
points.append(gen_point(x)) | |
return points | |
def gen_point(x): | |
distrib = (x + 2.0 / 3) / 2 | |
return bernoulli.rvs(distrib) | |
# Returns the value of 'x' that minimizes the empirical risk. For doing so, | |
# it calculates the risk of setting the boundary (t) to each value of x_values, | |
# and given the labels, it will keep the value of x that minimized the loss over all | |
# values | |
def gen_emprisk_minimizer(boundaries, x_values, labels): | |
emp_risk_minimizer = 0 | |
min_emp_risk = float('inf') | |
for boundary in boundaries: | |
emp_risk = 0 | |
for i in range(len(x_values)): | |
x = x_values[i] | |
real_label = labels[i] | |
pred_label = (1, 0)[x < boundary] # pythonic inline if assignment ;) | |
emp_risk += (0, 1)[real_label != pred_label] | |
if emp_risk < min_emp_risk: | |
emp_risk_minimizer = boundary | |
min_emp_risk = emp_risk | |
return emp_risk_minimizer | |
def excess_risk_from_formula(t): | |
excess_risk = float(t) - (float(1)/float(3)) | |
excess_risk = excess_risk**2 | |
excess_risk = excess_risk/2 | |
return excess_risk | |
def excess_risk_discrete(x_values, t): | |
bayes_decision_boundary = float(1)/float(3) | |
if t == bayes_decision_boundary: | |
return 0 | |
excess_risk = 0 | |
p_x = float(1) / float(len(x_values)) | |
for x in x_values: | |
x = float(x) | |
if (x < bayes_decision_boundary) & (x >= t): # t < bayes_decision_boundary | |
excess_risk += p_x | |
if (x > bayes_decision_boundary) & (x < t): # t > bayes_decision_boundary | |
excess_risk += p_x | |
return excess_risk | |
if __name__ == "__main__": | |
n = 100 | |
x_values = np.arange(0, 1 + 1.0/n, 1.0/(n-1)) | |
labels = gen_points(x_values) | |
#print "x values: " + str(x_values) | |
#print "generated labels(Y): " + str(labels) | |
print "\n" | |
print "######### F ##########" | |
#print "boundaries: " + str(x_values) | |
emp_risk_minimizer = gen_emprisk_minimizer(x_values, x_values, labels) | |
print "t: " + str(emp_risk_minimizer) | |
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer) | |
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer)) | |
print "\n" | |
print "###### F_Q(3) #######" | |
Q = 3 | |
boundaries = np.arange(0, 1 + 1.0/(Q+1), 1.0/(Q)) | |
#print "boundaries: " + str(boundaries) | |
emp_risk_minimizer = gen_emprisk_minimizer(boundaries, x_values, labels) | |
print "t: " + str(emp_risk_minimizer) | |
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer) | |
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer)) | |
print "\n" | |
print "###### F_Q(20) #######" | |
Q = 20 | |
boundaries = np.arange(0, 1 + 1.0/(Q+1), 1.0/(Q)) | |
#print "boundaries: " + str(boundaries) | |
emp_risk_minimizer = gen_emprisk_minimizer(boundaries, x_values, labels) | |
print "t: " + str(emp_risk_minimizer) | |
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer) | |
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer)) | |
print "\n" | |
print "###### F_Q(100) #######" | |
Q = 100 | |
boundaries = np.arange(0, 1 + 1.0/(Q+1), 1.0/(Q)) | |
#print "boundaries: " + str(boundaries) | |
emp_risk_minimizer = gen_emprisk_minimizer(boundaries, x_values, labels) | |
print "t: " + str(emp_risk_minimizer) | |
print "excess risk (formula): %.3f" % excess_risk_from_formula(emp_risk_minimizer) | |
print "excess risk (discrete):" + str(excess_risk_discrete(x_values, emp_risk_minimizer)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment