Skip to content

Instantly share code, notes, and snippets.

@happyrobots
Created April 25, 2011 13:37
Show Gist options
  • Save happyrobots/940523 to your computer and use it in GitHub Desktop.
Save happyrobots/940523 to your computer and use it in GitHub Desktop.
stuff
class Array
# Largest range with different subsequence,
# without any kind of clever optimization.
def range_with_diff_subseq(other)
self_size = self.size
other_size = other.size
low = 0
max_pos = self_size < other_size ? self_size : other_size
# trim beginning with the same value
while low < max_pos and self[low] == other[low]
low += 1
end
high = -1
max_pos = -max_pos
# trim end with the same value
while high > max_pos and self[high] == other[high]
high -= 1
end
# the two arrays within this range contain different values
(low..high)
end
#
# Diff with another array (both array of integers).
# self and other must be already sorted.
# Worst case complexity is O(n).
# Inspired by Knuth-Morris-Pratt algorithm
# http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
# and Longest Common Subsequence problem
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
#
# And I think this is more like solving Longest Increasing Subsequence
# problem but with unique values in both arrays
# http://en.wikipedia.org/wiki/Longest_increasing_subsequence
#
# Examples:
# a = [1,2,3,5,8,9]
# b = [9]
# p b.sorted_diff(a) # {:removed=>[1, 2, 3, 5, 8], :added=>[]}
# p a.sorted_diff(b) # {:removed=>[], :added=>[1, 2, 3, 5, 8]}
# p a.sorted_diff(a) # {:removed=>[], :added=>[]}
#
def sorted_diff(other)
trimmed_range = self.range_with_diff_subseq other
trimmed_self = self[trimmed_range]
trimmed_other = other[trimmed_range]
self_size = trimmed_self.size
other_size = trimmed_other.size
self_i = 0; other_i = 0
added = []; removed = []
while self_i < self_size and other_i < other_size
if trimmed_self[self_i] == trimmed_other[other_i]
self_i += 1; other_i += 1
elsif trimmed_self[self_i] < trimmed_other[other_i]
added << trimmed_self[self_i]
self_i += 1
else
removed << trimmed_other[other_i]
other_i += 1
end
end
if other_i == other_size
sub_self = trimmed_self[self_i...self_size]
added.concat sub_self unless sub_self.empty?
end
if self_i == self_size
sub_other = trimmed_other[other_i...other_size]
removed.concat sub_other unless sub_other.empty?
end
{ :added => added, :removed => removed }
end
#
# Standard algorithm for solving Longest Increasing Subsequence
# problem http://en.wikipedia.org/wiki/Longest_increasing_subsequence
# Does exactly the same as above method but with
# worst case complexity O(n log n).
# This is ok for n < 10
#
def small_sorted_diff(other)
added = []; removed = []
self.each { |el| added << el unless other.binary_search(el) }
other.each { |el| removed << el unless self.binary_search(el) }
{ :added => added, :removed => removed }
end
# http://snippets.dzone.com/posts/show/898
#
# Chooses a random array element from the receiver based on the weights
# provided. If _weights_ is nil, then each element is weighed equally.
#
# [1,2,3].random #=> 2
# [1,2,3].random #=> 1
# [1,2,3].random #=> 3
#
# If _weights_ is an array, then each element of the receiver gets its
# weight from the corresponding element of _weights_. Notice that it
# favors the element with the highest weight.
#
# [1,2,3].random([1,4,1]) #=> 2
# [1,2,3].random([1,4,1]) #=> 1
# [1,2,3].random([1,4,1]) #=> 2
# [1,2,3].random([1,4,1]) #=> 2
# [1,2,3].random([1,4,1]) #=> 3
#
# If _weights_ is a symbol, the weight array is constructed by calling
# the appropriate method on each array element in turn. Notice that
# it favors the longer word when using :length.
#
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus"
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "dog"
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus"
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus"
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "cat"
def random(weights=nil)
return random(map {|n| n.send(weights)}) if weights.is_a? Symbol
weights ||= Array.new(length, 1.0)
total = weights.inject(0.0) {|t,w| t+w}
point = rand * total
zip(weights).each do |n,w|
return n if w >= point
point -= w
end
end
# Example usage:
# array = [1, 2, 3, 4, 5, 6, 7, 8, 9]
# puts array.binary_search(9) #=> 8
def binary_search(x)
Array.binary_search_within_interval(self, x, 0, self.length)
end
def self.binary_search_within_interval(arr, elem, low, high)
return nil if low > high
mid = low+((high-low)/2).to_i
if !arr[mid]
return nil
elsif elem < arr[mid]
return Array.binary_search_within_interval(arr, elem, low, mid-1)
elsif elem > arr[mid]
return Array.binary_search_within_interval(arr, elem, mid+1, high)
else
return mid
end
end
def sum_elements
self.inject(0) { |tmp, memo| memo += tmp }
end
def average
self.sum_elements / self.size
end
def standard_deviation(unbiased=true)
Math.sqrt self.variance(unbiased)
end
def variance(unbiased=true)
# self.map { |i| i**2 }.average - self.average**2
avg = self.average
sigma = self.inject(0) { |n, memo| memo += (n - avg)**2 }
denom = unbiased ? self.size - 1 : self.size
sigma / denom.to_f
end
def difference_with(other)
self.each_with_index.map { |el, i| el - other[i] }
end
def t_test(alpha, mu)
t_n = (self.average - mu) / Math.sqrt(self.variance(false) / self.size)
t_n > alpha ? "Reject H0" : "Accept H0" # FIXME: use table
end
def confidence_interval(alpha)
avg = self.average
# FIXME: use table
half_length = alpha * self.standard_deviation / Math.sqrt(self.size)
min = avg - half_length
max = avg + half_length
{ :average => avg, :half_length => half_length, :range => (min..max) }
end
# For output comparison of a pair
def paired_t(other, alpha)
z_r = self.difference_with other
self_size = self.size
from_t_table = (self_size - 1) * alpha/2 # FIXME: use table
z_r.confidence_interval alpha
end
# For output comparison of multisystems (need to check again)
def self.bonferroni(replicas_x, alpha, multipair=false)
avgs = replicas_x.map { |x| x.average }
if multipair
total = avgs.size
result = []
avgs.each_with_index do |avg, i|
result << []
for j in i...total
result[i][j] = avg.paired_t(avgs[j])
end
end
result
else
only_first = [avgs[0]]
avgs[1..-1].map do { |avg| only_first.paired_t(avg, alpha) }
end
end
# For input modelling
def kolmogorof_smirnov_test(alpha)
self.sort!
self_size = self.size
d_plus = self.each_with_index.map { |r, i| i/self_size - r }.sort!
d_minus = self.each_with_index.map { |r, i| r - (i-1)/self_size }.sort!
d = d_plus[-1] > d_minus[-1] ? d_plus[-1] : d_minus[-1]
d_alpha = 10 * alpha # FIXME: use table
d > d_alpha ? "Rejected" : "No difference"
end
end
[Benchmarking for n=3]
small_sorted_diff with O(n log n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000062)
second 0.000000 0.000000 0.000000 ( 0.000037)
third 0.000000 0.000000 0.000000 ( 0.000037)
sorted_diff with O(n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000050)
second 0.000000 0.000000 0.000000 ( 0.000029)
third 0.000000 0.000000 0.000000 ( 0.000069)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=3]
================================================================
[Benchmarking for n=5]
small_sorted_diff with O(n log n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000059)
second 0.000000 0.000000 0.000000 ( 0.000058)
third 0.000000 0.000000 0.000000 ( 0.000171)
sorted_diff with O(n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000038)
second 0.000000 0.000000 0.000000 ( 0.000035)
third 0.000000 0.000000 0.000000 ( 0.000034)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=5]
================================================================
[Benchmarking for n=10]
small_sorted_diff with O(n log n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000168)
second 0.000000 0.000000 0.000000 ( 0.000137)
third 0.000000 0.000000 0.000000 ( 0.000169)
sorted_diff with O(n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000062)
second 0.000000 0.000000 0.000000 ( 0.000048)
third 0.000000 0.000000 0.000000 ( 0.000056)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=10]
================================================================
[Benchmarking for n=100]
small_sorted_diff with O(n log n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.004804)
second 0.010000 0.000000 0.010000 ( 0.002245)
third 0.000000 0.000000 0.000000 ( 0.002212)
sorted_diff with O(n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.000281)
second 0.000000 0.000000 0.000000 ( 0.000405)
third 0.000000 0.000000 0.000000 ( 0.000269)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=100]
================================================================
[Benchmarking for n=1000]
small_sorted_diff with O(n log n)
user system total real
first 0.030000 0.000000 0.030000 ( 0.037513)
second 0.040000 0.000000 0.040000 ( 0.033168)
third 0.030000 0.000000 0.030000 ( 0.039381)
sorted_diff with O(n)
user system total real
first 0.000000 0.000000 0.000000 ( 0.002537)
second 0.000000 0.000000 0.000000 ( 0.002503)
third 0.010000 0.000000 0.010000 ( 0.002499)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=1000]
================================================================
[Benchmarking for n=2000]
small_sorted_diff with O(n log n)
user system total real
first 0.070000 0.000000 0.070000 ( 0.072813)
second 0.070000 0.000000 0.070000 ( 0.071853)
third 0.070000 0.000000 0.070000 ( 0.072195)
sorted_diff with O(n)
user system total real
first 0.010000 0.000000 0.010000 ( 0.004950)
second 0.000000 0.000000 0.000000 ( 0.004924)
third 0.010000 0.000000 0.010000 ( 0.004967)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=2000]
================================================================
[Benchmarking for n=5000]
small_sorted_diff with O(n log n)
user system total real
first 0.200000 0.000000 0.200000 ( 0.204547)
second 0.200000 0.000000 0.200000 ( 0.203259)
third 0.200000 0.000000 0.200000 ( 0.203219)
sorted_diff with O(n)
user system total real
first 0.010000 0.000000 0.010000 ( 0.012261)
second 0.020000 0.000000 0.020000 ( 0.012820)
third 0.010000 0.000000 0.010000 ( 0.014017)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=5000]
================================================================
[Benchmarking for n=10000]
small_sorted_diff with O(n log n)
user system total real
first 0.440000 0.000000 0.440000 ( 0.438518)
second 0.430000 0.000000 0.430000 ( 0.439827)
third 0.440000 0.000000 0.440000 ( 0.446619)
sorted_diff with O(n)
user system total real
first 0.020000 0.000000 0.020000 ( 0.025081)
second 0.030000 0.000000 0.030000 ( 0.024960)
third 0.020000 0.000000 0.020000 ( 0.027038)
Just for checking: Both results are equivalent? true
[Finished benchmarking for n=10000]
================================================================
class Array
#
# Standard algorithm for solving Longest Increasing Subsequence
# problem http://en.wikipedia.org/wiki/Longest_increasing_subsequence
# Worst case complexity is O(n log n).
# This is ok for n < 10
#
def small_sorted_diff(other)
added = []; removed = []
self.each { |el| added << el unless other.binary_search(el) }
other.each { |el| removed << el unless self.binary_search(el) }
{ :added => added, :removed => removed }
end
end
class Array
# Largest range with different subsequence,
# without any kind of clever optimization.
def range_with_diff_subseq(other)
self_size = self.size
other_size = other.size
low = 0
max_pos = self_size < other_size ? self_size : other_size
# trim beginning with the same value
while low < max_pos and self[low] == other[low]
low += 1
end
high = -1
max_pos = -max_pos
# trim end with the same value
while high > max_pos and self[high] == other[high]
high -= 1
end
# the two arrays within this range contain different values
(low..high)
end
#
# Diff with another array (both array of integers).
# self and other must be already sorted.
# Worst case complexity is O(n).
# Inspired by Knuth-Morris-Pratt algorithm
# http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm
# and Longest Common Subsequence problem
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
#
# And I think this is more like solving Longest Increasing Subsequence
# problem but with unique values in both arrays
# http://en.wikipedia.org/wiki/Longest_increasing_subsequence
#
# Examples:
# a = [1,2,3,5,8,9]
# b = [9]
# p b.sorted_diff(a) # {:removed=>[1, 2, 3, 5, 8], :added=>[]}
# p a.sorted_diff(b) # {:removed=>[], :added=>[1, 2, 3, 5, 8]}
# p a.sorted_diff(a) # {:removed=>[], :added=>[]}
#
def sorted_diff(other)
# Longest Common Subsequence Optimization
trimmed_range = self.range_with_diff_subseq other
trimmed_self = self[trimmed_range]
trimmed_other = other[trimmed_range]
self_size = trimmed_self.size
other_size = trimmed_other.size
self_i = 0; other_i = 0
added = []; removed = []
# This looks similar to Knuth-Morris-Pratt algorithm
while self_i < self_size and other_i < other_size
if trimmed_self[self_i] == trimmed_other[other_i]
self_i += 1; other_i += 1
# Immediately decide if any of the compared elements
# are to be removed or added.
elsif trimmed_self[self_i] < trimmed_other[other_i]
added << trimmed_self[self_i]
self_i += 1
else
removed << trimmed_other[other_i]
other_i += 1
end
end
# Add the rest
if other_i == other_size
sub_self = trimmed_self[self_i...self_size]
added.concat sub_self unless sub_self.empty?
end
# Remove the rest
if self_i == self_size
sub_other = trimmed_other[other_i...other_size]
removed.concat sub_other unless sub_other.empty?
end
{ :added => added, :removed => removed }
end
end
require 'benchmark'
def do_benchmark(n)
puts "[Benchmarking for n=#{n}]"
a = []; b = []
for i in 1..n
if i.even?
a << i
else
b << i
end
end
result_1 = nil
puts "small_sorted_diff with O(n log n)"
Benchmark.bm(7) do |x|
x.report('first') { result_1 = a.small_sorted_diff b }
x.report('second') { result_1 = a.small_sorted_diff b }
x.report('third') { result_1 = a.small_sorted_diff b }
end
result_2 = nil
puts "sorted_diff with O(n)"
Benchmark.bm(7) do |x|
x.report('first') { result_2 = a.sorted_diff b }
x.report('second') { result_2 = a.sorted_diff b }
x.report('third') { result_2 = a.sorted_diff b }
end
puts "Just for checking: Both results are equivalent? #{result_1 and result_2 and result_1 == result_2}"
puts "[Finished benchmarking for n=#{n}]"
puts "================================================================"
puts " "
end
do_benchmark 3
do_benchmark 5
do_benchmark 10
do_benchmark 100
do_benchmark 1000
do_benchmark 2000
do_benchmark 5000
do_benchmark 10000
time_t timer = time(NULL);
printf("Local time is %s\n", ctime(&timer));
printf("GMT time is: %s\n", asctime(gmtime(&timer)));
char buffer[80];
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S", gmtime(&timer));
puts(buffer);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment