Skip to content

Instantly share code, notes, and snippets.

@mh-github
Created January 8, 2016 10:41
Show Gist options
  • Select an option

  • Save mh-github/2d7dd736a2605685573a to your computer and use it in GitHub Desktop.

Select an option

Save mh-github/2d7dd736a2605685573a to your computer and use it in GitHub Desktop.
def scalefeatures(data, m, n)
mean = [0]
1.upto n do |j|
sum = 0.0
0.upto m-1 do |i|
sum += data[i][j]
end
mean << sum / m
end
stddeviation = [0]
1.upto n do |j|
temp = 0.0
0.upto m-1 do |i|
temp += (data[i][j] - mean[j]) ** 2
end
stddeviation << Math.sqrt(temp / m)
end
1.upto n do |j|
0.upto m-1 do |i|
data[i][j] = (data[i][j] - mean[j]) / stddeviation[j]
end
end
return data
end
def h_logistic_regression(theta, x, n)
theta_t_x = 0
0.upto n do |i|
theta_t_x += theta[i] * x[i]
end
begin
k = 1.0 / (1 + Math.exp(-theta_t_x))
rescue
if theta_t_x > 10 ** 5
k = 1.0 / (1 + Math.exp(-100))
else
k = 1.0 / (1 + Math.exp(100))
end
end
if k == 1.0
k = 0.99999
end
return k
end
def gradientdescent_logistic(theta, x, y, m, n, alpha, iterations)
0.upto iterations-1 do |i|
thetatemp = theta.clone
0.upto n do |j|
summation = 0.0
0.upto m-1 do |k|
summation += (h_logistic_regression(theta, x[k], n) - y[k]) *
x[k][j]
end
thetatemp[j] = thetatemp[j] - alpha * summation / m
end
theta = thetatemp.clone
end
return theta
end
def cost_logistic_regression(theta, x, y, m, n)
summation = 0.0
0.upto m-1 do |i|
summation += y[i] * Math.log(h_logistic_regression(theta, x[i], n)) +
(1 - y[i]) *
Math.log(1 - h_logistic_regression(theta, x[i], n))
end
return -summation / m
end
def main()
x = [] # List of training example parameters
y = [] # List of training example results
while line = $stdin.gets
data = line.chomp.split(',').map(&:to_f)
x << data[0..-2]
y << data[-1]
end
m = x.length # Number of training examples
n = x[0].length # Number of features
# Append a column of 1's to x
x.each {|i| i.unshift(1)}
# Initialize theta's
initialtheta = [0.0] * (n + 1)
learningrate = 0.001
iterations = 4000
x = scalefeatures(x, m, n)
# Run gradient descent to get our guessed hypothesis
finaltheta = gradientdescent_logistic(initialtheta,
x, y, m, n,
learningrate, iterations)
# Evaluate our hypothesis accuracy
puts "Initial cost: #{cost_logistic_regression(initialtheta, x, y, m, n)}"
puts "Final cost: #{cost_logistic_regression(finaltheta, x, y, m, n)}"
end
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment