mh-github · January 8, 2016 10:41
diff --git a/mh-logistic-regression.rb b/mh-logistic-regression.rb
 def scalefeatures(data, m, n)
    mean = [0]
    1.upto n do |j|
        sum = 0.0
        0.upto m-1 do |i|
            sum += data[i][j]
        end
        mean << sum / m
    end

    stddeviation = [0]
    1.upto n do |j|
        temp = 0.0
        0.upto m-1 do |i|
            temp += (data[i][j] - mean[j]) ** 2
        end
        stddeviation << Math.sqrt(temp / m)
    end

    1.upto n do |j|
        0.upto m-1 do |i|
            data[i][j] = (data[i][j] - mean[j]) / stddeviation[j]
        end
    end    

    return data
 end

 def h_logistic_regression(theta, x, n)
    theta_t_x = 0
    0.upto n do |i|
        theta_t_x += theta[i] * x[i]
    end

    begin
        k = 1.0 / (1 + Math.exp(-theta_t_x))
    rescue
        if theta_t_x > 10 ** 5
            k = 1.0 / (1 + Math.exp(-100))
        else
            k = 1.0 / (1 + Math.exp(100))
        end
    end 
    
    if k == 1.0
        k = 0.99999
    end

    return k
 end

 def gradientdescent_logistic(theta, x, y, m, n, alpha, iterations)
    0.upto iterations-1 do |i|
        thetatemp = theta.clone
        0.upto n do |j|
            summation = 0.0
            0.upto m-1 do |k|
                summation += (h_logistic_regression(theta, x[k], n) - y[k]) *
                             x[k][j]
             end
             thetatemp[j] = thetatemp[j] - alpha * summation / m
        end
        theta = thetatemp.clone
    end
    return theta
 end

 def cost_logistic_regression(theta, x, y, m, n)
    summation = 0.0
    0.upto m-1 do |i|
        summation += y[i] * Math.log(h_logistic_regression(theta, x[i], n)) +
                     (1 - y[i]) *
                     Math.log(1 - h_logistic_regression(theta, x[i], n))
    end
    return -summation / m
 end

 def main()
    x = []  # List of training example parameters
    y = []  # List of training example results

    while line = $stdin.gets
        data = line.chomp.split(',').map(&:to_f)
        x << data[0..-2]
        y << data[-1]
    end

    m = x.length      # Number of training examples
    n = x[0].length   # Number of features

    # Append a column of 1's to x
    x.each {|i| i.unshift(1)}

    # Initialize theta's
    initialtheta = [0.0] * (n + 1)
    learningrate = 0.001
    iterations   = 4000

    x = scalefeatures(x, m, n)

    # Run gradient descent to get our guessed hypothesis
    finaltheta = gradientdescent_logistic(initialtheta,
                                          x, y, m, n,
                                          learningrate, iterations)

    # Evaluate our hypothesis accuracy
    puts "Initial cost: #{cost_logistic_regression(initialtheta, x, y, m, n)}"
    puts "Final cost: #{cost_logistic_regression(finaltheta, x, y, m, n)}"
 end

 main()
	def scalefeatures(data, m, n)
	mean = [0]
	1.upto n do \|j\|
	sum = 0.0
	0.upto m-1 do \|i\|
	sum += data[i][j]
	end
	mean << sum / m
	end

	stddeviation = [0]
	1.upto n do \|j\|
	temp = 0.0
	0.upto m-1 do \|i\|
	temp += (data[i][j] - mean[j]) ** 2
	end
	stddeviation << Math.sqrt(temp / m)
	end

	1.upto n do \|j\|
	0.upto m-1 do \|i\|
	data[i][j] = (data[i][j] - mean[j]) / stddeviation[j]
	end
	end

	return data
	end

	def h_logistic_regression(theta, x, n)
	theta_t_x = 0
	0.upto n do \|i\|
	theta_t_x += theta[i] * x[i]
	end

	begin
	k = 1.0 / (1 + Math.exp(-theta_t_x))
	rescue
	if theta_t_x > 10 ** 5
	k = 1.0 / (1 + Math.exp(-100))
	else
	k = 1.0 / (1 + Math.exp(100))
	end
	end

	if k == 1.0
	k = 0.99999
	end

	return k
	end

	def gradientdescent_logistic(theta, x, y, m, n, alpha, iterations)
	0.upto iterations-1 do \|i\|
	thetatemp = theta.clone
	0.upto n do \|j\|
	summation = 0.0
	0.upto m-1 do \|k\|
	summation += (h_logistic_regression(theta, x[k], n) - y[k]) *
	x[k][j]
	end
	thetatemp[j] = thetatemp[j] - alpha * summation / m
	end
	theta = thetatemp.clone
	end
	return theta
	end

	def cost_logistic_regression(theta, x, y, m, n)
	summation = 0.0
	0.upto m-1 do \|i\|
	summation += y[i] * Math.log(h_logistic_regression(theta, x[i], n)) +
	(1 - y[i]) *
	Math.log(1 - h_logistic_regression(theta, x[i], n))
	end
	return -summation / m
	end

	def main()
	x = [] # List of training example parameters
	y = [] # List of training example results

	while line = $stdin.gets
	data = line.chomp.split(',').map(&:to_f)
	x << data[0..-2]
	y << data[-1]
	end

	m = x.length # Number of training examples
	n = x[0].length # Number of features

	# Append a column of 1's to x
	x.each {\|i\| i.unshift(1)}

	# Initialize theta's
	initialtheta = [0.0] * (n + 1)
	learningrate = 0.001
	iterations = 4000

	x = scalefeatures(x, m, n)

	# Run gradient descent to get our guessed hypothesis
	finaltheta = gradientdescent_logistic(initialtheta,
	x, y, m, n,
	learningrate, iterations)

	# Evaluate our hypothesis accuracy
	puts "Initial cost: #{cost_logistic_regression(initialtheta, x, y, m, n)}"
	puts "Final cost: #{cost_logistic_regression(finaltheta, x, y, m, n)}"
	end

	main()
No results found