Created
October 16, 2009 01:55
-
-
Save davidrichards/211471 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is used when the data set's true max and min can't be calculated. | |
# It provides approximate values for normalization. | |
class PseudoNormalize | |
require 'mathn' | |
class << self | |
def process(opts={}) | |
sample = opts.delete(:sample) | |
opts = {:sample => sample} if opts.empty | |
pn = new(opts) | |
pn.process(*sample) | |
end | |
end | |
attr_reader :max, :min, :diff, :learn | |
attr_accessor :learning_flag | |
def initialize(opts={}) | |
@max = opts[:max] if opts[:max] | |
@min = opts[:min] if opts[:min] | |
@learn = opts.fetch(:learn, false) | |
set_max_min(opts[:sample]) if opts[:sample] | |
raise ArgumentError, "Must provide a sample or a max and a min" unless self.max and self.min | |
end | |
def process(*values) | |
ret_value = if values.size == 1 | |
normalize values.first | |
else | |
values.map {|v| normalize(v)} | |
end | |
# Recalculate everything if the learning flag was set | |
if self.learning_flag | |
self.learning_flag = false | |
process(*values) | |
else | |
ret_value | |
end | |
end | |
# I don't know, 3 standard deviations ought to do it... | |
def set_max_min(*samples) | |
mean = self.mean(*samples) | |
std = self.standard_deviation(*samples) | |
@max = mean + (std * 3) | |
@min = mean - (std * 3) | |
end | |
def normalize(v) | |
# Get the true max, min, and diff for this value | |
max = v > self.max ? v : self.max | |
min = v < self.min ? v : self.min | |
diff = max - min | |
# Set a flag that the range changed if we're learning and the range changed | |
self.learning_flag = true if (max != self.max or min != self.min) and self.learn | |
# Change the range (max, min, and diff) if we are learning | |
@max, @min, @diff = max, min, diff if self.learn | |
# Return a normalized value | |
(v - min) / diff | |
end | |
protected | |
def diff(reset=false) | |
@diff = nil if reset | |
@diff ||= self.max - self.min | |
end | |
# Probably shouldn't use this | |
def sigmoid(v) | |
1 / (1 + Math::E ** -v) | |
end | |
def mean(*samples) | |
sum(*samples) / samples.size | |
end | |
def zero(*samples) | |
samples.any? {|e| e.is_a?(Float)} ? 0.0 : 0 | |
end | |
def sum(*samples) | |
samples.inject(zero(*samples)) {|s, e| s += e} | |
end | |
def variance(*samples) | |
m = mean(*samples) | |
sum_of_differences = samples.inject(zero(*samples)) {|s, i| s += (m - i) ** 2 } | |
sum_of_differences / (samples.size - 1) | |
end | |
def standard_deviation(*samples) | |
Math::sqrt(variance(*samples)) | |
end | |
end | |
describe PseudoNormalize do | |
before do | |
@pn = PseudoNormalize.new(:max => 10, :min => 0) | |
end | |
it "should be able to normalize values with a known max and min" do | |
@pn.normalize(3).should eql(3/10) | |
@pn.normalize(6).should eql(6/10) | |
@pn.normalize(9).should eql(9/10) | |
end | |
it "should be able to normalize a value higher than the max" do | |
@pn.normalize(10).should eql(10/10) | |
@pn.normalize(11).should eql(11/11) | |
@pn.normalize(15).should eql(15/15) | |
end | |
it "should be able to normalize a value lower than the min" do | |
@pn.normalize(-1).should eql(0/11) | |
@pn.normalize(-4).should eql(0/15) | |
end | |
it "should be able to learn the max and the min" do | |
@pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true) | |
@pn.learn.should be_true | |
@pn.normalize(10).should eql(10/10) | |
@pn.normalize(11).should eql(11/11) | |
@pn.normalize(10).should eql(10/11) | |
end | |
it "should be able to process a value" do | |
@pn.process(5).should eql(5/10) | |
end | |
it "should be able to process more than one value" do | |
@pn.process(2,4,6).should eql([2/10, 4/10, 6/10]) | |
end | |
it "should be able to accurately process more than one value when the range changes" do | |
@pn = PseudoNormalize.new(:max => 10, :min => 0, :learn => true) | |
@pn.process(2,4,6,11).should eql([2/11, 4/11, 6/11, 11/11]) | |
end | |
it "should be able to set the range based on 3 standard deviations from a mean" do | |
@pn.set_max_min(*(1..10_000).map{rand}) | |
@pn.max.should be_close(1.37, 0.1) | |
@pn.min.should be_close(-0.38, 0.1) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment