run the example:
$ ruby houses.rb
The example uses gnuplot which may need to be installed on your system. See the gem for more details
| 3,10 | |
| 15,4 | |
| 30,35 | |
| 10,12 | |
| 7,5 | |
| 35,30 | |
| 25,25 | |
| 20,23 | |
| 30,27 | |
| 17,13 | |
| 15,20 |
| source "https://rubygems.org" | |
| gem "gnuplot" | |
| gem "highline" |
| class House | |
| attr_reader :size, :value | |
| def initialize(size:, value:) | |
| @size = size.to_f | |
| @value = value.to_f | |
| end | |
| end |
| require "./house" | |
| class HouseData | |
| def initialize | |
| @csv = CSV.open("data.txt", "a+b") | |
| end | |
| def houses | |
| @csv.rewind | |
| @csv.map { |row| | |
| House.new(size: row.first, value: row.last) | |
| } | |
| end | |
| def add(row) | |
| @csv << row | |
| end | |
| end |
| require "highline" | |
| require "csv" | |
| require "gnuplot" | |
| require "./house_data" | |
| require "./plot" | |
| require "./learn" | |
| data = HouseData.new | |
| cli = HighLine.new | |
| cli.choose do |menu| | |
| menu.prompt = "What would you like to do?" | |
| menu.choice("Add Data") { | |
| size = cli.ask("Size (in 100s sq. ft.):") | |
| value = cli.ask("Value (in $10,000s):") | |
| data.add([size, value]) | |
| } | |
| menu.choice("View Data") { | |
| sizes = data.houses.map(&:size) | |
| values = data.houses.map(&:value) | |
| Plot.new(sizes, values).draw | |
| } | |
| menu.choice("Predict Value") { | |
| raw_data = data.houses.map { |h| [h.size, h.value] } | |
| learn = Learn.new(raw_data) | |
| size = cli.ask("Size (in 100s sq. ft.):") | |
| predicted_value = (learn.predict(size.to_f) * 10).round | |
| cli.say("Such a house is worth about $#{predicted_value}k") | |
| } | |
| end |
| class Learn | |
| LINE_THROUGH_ORIGIN = ->(slope, x) { slope * x }.curry | |
| MEAN_ERRORS = ->(data, hypothesis, slope) { | |
| errors = data.map { |(x, y)| hypothesis.(slope, x) - y } | |
| errors.reduce(:+) / errors.count.to_f | |
| }.curry | |
| MINIMIZE = ->(calculate_cost, learning_rate: 0.2, iterations: 10_000, &callback) { | |
| (1..iterations).reduce(0.0) do |slope, iteration| | |
| cost = calculate_cost.(slope) | |
| direction = cost < 0 ? 1 : -1 | |
| step = direction * learning_rate / (iteration.to_f) | |
| (slope += step).tap { callback.(slope) if callback } | |
| end | |
| } | |
| def initialize(data) | |
| slope = MINIMIZE.(MEAN_ERRORS.(data, LINE_THROUGH_ORIGIN)) | |
| @predictor = LINE_THROUGH_ORIGIN.(slope) | |
| end | |
| def predict(input) | |
| @predictor.(input) | |
| end | |
| end |
| require "gnuplot" | |
| class Plot | |
| attr_reader :sizes, :values | |
| def initialize(sizes, values) | |
| @sizes = sizes | |
| @values = values | |
| end | |
| def draw | |
| Gnuplot.open do |gp| | |
| Gnuplot::Plot.new(gp) do |plot| | |
| plot.xlabel "Size in 100s sq. ft." | |
| plot.ylabel "Value in $10,000s" | |
| plot.xrange "[0:50]" | |
| plot.yrange "[0:50]" | |
| plot.data << Gnuplot::DataSet.new([sizes,values]) | |
| end | |
| end | |
| end | |
| end |