Created
April 25, 2011 13:37
-
-
Save happyrobots/940523 to your computer and use it in GitHub Desktop.
stuff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Array | |
# Largest range with different subsequence, | |
# without any kind of clever optimization. | |
def range_with_diff_subseq(other) | |
self_size = self.size | |
other_size = other.size | |
low = 0 | |
max_pos = self_size < other_size ? self_size : other_size | |
# trim beginning with the same value | |
while low < max_pos and self[low] == other[low] | |
low += 1 | |
end | |
high = -1 | |
max_pos = -max_pos | |
# trim end with the same value | |
while high > max_pos and self[high] == other[high] | |
high -= 1 | |
end | |
# the two arrays within this range contain different values | |
(low..high) | |
end | |
# | |
# Diff with another array (both array of integers). | |
# self and other must be already sorted. | |
# Worst case complexity is O(n). | |
# Inspired by Knuth-Morris-Pratt algorithm | |
# http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm | |
# and Longest Common Subsequence problem | |
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem | |
# | |
# And I think this is more like solving Longest Increasing Subsequence | |
# problem but with unique values in both arrays | |
# http://en.wikipedia.org/wiki/Longest_increasing_subsequence | |
# | |
# Examples: | |
# a = [1,2,3,5,8,9] | |
# b = [9] | |
# p b.sorted_diff(a) # {:removed=>[1, 2, 3, 5, 8], :added=>[]} | |
# p a.sorted_diff(b) # {:removed=>[], :added=>[1, 2, 3, 5, 8]} | |
# p a.sorted_diff(a) # {:removed=>[], :added=>[]} | |
# | |
def sorted_diff(other) | |
trimmed_range = self.range_with_diff_subseq other | |
trimmed_self = self[trimmed_range] | |
trimmed_other = other[trimmed_range] | |
self_size = trimmed_self.size | |
other_size = trimmed_other.size | |
self_i = 0; other_i = 0 | |
added = []; removed = [] | |
while self_i < self_size and other_i < other_size | |
if trimmed_self[self_i] == trimmed_other[other_i] | |
self_i += 1; other_i += 1 | |
elsif trimmed_self[self_i] < trimmed_other[other_i] | |
added << trimmed_self[self_i] | |
self_i += 1 | |
else | |
removed << trimmed_other[other_i] | |
other_i += 1 | |
end | |
end | |
if other_i == other_size | |
sub_self = trimmed_self[self_i...self_size] | |
added.concat sub_self unless sub_self.empty? | |
end | |
if self_i == self_size | |
sub_other = trimmed_other[other_i...other_size] | |
removed.concat sub_other unless sub_other.empty? | |
end | |
{ :added => added, :removed => removed } | |
end | |
# | |
# Standard algorithm for solving Longest Increasing Subsequence | |
# problem http://en.wikipedia.org/wiki/Longest_increasing_subsequence | |
# Does exactly the same as above method but with | |
# worst case complexity O(n log n). | |
# This is ok for n < 10 | |
# | |
def small_sorted_diff(other) | |
added = []; removed = [] | |
self.each { |el| added << el unless other.binary_search(el) } | |
other.each { |el| removed << el unless self.binary_search(el) } | |
{ :added => added, :removed => removed } | |
end | |
# http://snippets.dzone.com/posts/show/898 | |
# | |
# Chooses a random array element from the receiver based on the weights | |
# provided. If _weights_ is nil, then each element is weighed equally. | |
# | |
# [1,2,3].random #=> 2 | |
# [1,2,3].random #=> 1 | |
# [1,2,3].random #=> 3 | |
# | |
# If _weights_ is an array, then each element of the receiver gets its | |
# weight from the corresponding element of _weights_. Notice that it | |
# favors the element with the highest weight. | |
# | |
# [1,2,3].random([1,4,1]) #=> 2 | |
# [1,2,3].random([1,4,1]) #=> 1 | |
# [1,2,3].random([1,4,1]) #=> 2 | |
# [1,2,3].random([1,4,1]) #=> 2 | |
# [1,2,3].random([1,4,1]) #=> 3 | |
# | |
# If _weights_ is a symbol, the weight array is constructed by calling | |
# the appropriate method on each array element in turn. Notice that | |
# it favors the longer word when using :length. | |
# | |
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus" | |
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "dog" | |
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus" | |
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus" | |
# ['dog', 'cat', 'hippopotamus'].random(:length) #=> "cat" | |
def random(weights=nil) | |
return random(map {|n| n.send(weights)}) if weights.is_a? Symbol | |
weights ||= Array.new(length, 1.0) | |
total = weights.inject(0.0) {|t,w| t+w} | |
point = rand * total | |
zip(weights).each do |n,w| | |
return n if w >= point | |
point -= w | |
end | |
end | |
# Example usage: | |
# array = [1, 2, 3, 4, 5, 6, 7, 8, 9] | |
# puts array.binary_search(9) #=> 8 | |
def binary_search(x) | |
Array.binary_search_within_interval(self, x, 0, self.length) | |
end | |
def self.binary_search_within_interval(arr, elem, low, high) | |
return nil if low > high | |
mid = low+((high-low)/2).to_i | |
if !arr[mid] | |
return nil | |
elsif elem < arr[mid] | |
return Array.binary_search_within_interval(arr, elem, low, mid-1) | |
elsif elem > arr[mid] | |
return Array.binary_search_within_interval(arr, elem, mid+1, high) | |
else | |
return mid | |
end | |
end | |
def sum_elements | |
self.inject(0) { |tmp, memo| memo += tmp } | |
end | |
def average | |
self.sum_elements / self.size | |
end | |
def standard_deviation(unbiased=true) | |
Math.sqrt self.variance(unbiased) | |
end | |
def variance(unbiased=true) | |
# self.map { |i| i**2 }.average - self.average**2 | |
avg = self.average | |
sigma = self.inject(0) { |n, memo| memo += (n - avg)**2 } | |
denom = unbiased ? self.size - 1 : self.size | |
sigma / denom.to_f | |
end | |
def difference_with(other) | |
self.each_with_index.map { |el, i| el - other[i] } | |
end | |
def t_test(alpha, mu) | |
t_n = (self.average - mu) / Math.sqrt(self.variance(false) / self.size) | |
t_n > alpha ? "Reject H0" : "Accept H0" # FIXME: use table | |
end | |
def confidence_interval(alpha) | |
avg = self.average | |
# FIXME: use table | |
half_length = alpha * self.standard_deviation / Math.sqrt(self.size) | |
min = avg - half_length | |
max = avg + half_length | |
{ :average => avg, :half_length => half_length, :range => (min..max) } | |
end | |
# For output comparison of a pair | |
def paired_t(other, alpha) | |
z_r = self.difference_with other | |
self_size = self.size | |
from_t_table = (self_size - 1) * alpha/2 # FIXME: use table | |
z_r.confidence_interval alpha | |
end | |
# For output comparison of multisystems (need to check again) | |
def self.bonferroni(replicas_x, alpha, multipair=false) | |
avgs = replicas_x.map { |x| x.average } | |
if multipair | |
total = avgs.size | |
result = [] | |
avgs.each_with_index do |avg, i| | |
result << [] | |
for j in i...total | |
result[i][j] = avg.paired_t(avgs[j]) | |
end | |
end | |
result | |
else | |
only_first = [avgs[0]] | |
avgs[1..-1].map do { |avg| only_first.paired_t(avg, alpha) } | |
end | |
end | |
# For input modelling | |
def kolmogorof_smirnov_test(alpha) | |
self.sort! | |
self_size = self.size | |
d_plus = self.each_with_index.map { |r, i| i/self_size - r }.sort! | |
d_minus = self.each_with_index.map { |r, i| r - (i-1)/self_size }.sort! | |
d = d_plus[-1] > d_minus[-1] ? d_plus[-1] : d_minus[-1] | |
d_alpha = 10 * alpha # FIXME: use table | |
d > d_alpha ? "Rejected" : "No difference" | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[Benchmarking for n=3] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000062) | |
second 0.000000 0.000000 0.000000 ( 0.000037) | |
third 0.000000 0.000000 0.000000 ( 0.000037) | |
sorted_diff with O(n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000050) | |
second 0.000000 0.000000 0.000000 ( 0.000029) | |
third 0.000000 0.000000 0.000000 ( 0.000069) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=3] | |
================================================================ | |
[Benchmarking for n=5] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000059) | |
second 0.000000 0.000000 0.000000 ( 0.000058) | |
third 0.000000 0.000000 0.000000 ( 0.000171) | |
sorted_diff with O(n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000038) | |
second 0.000000 0.000000 0.000000 ( 0.000035) | |
third 0.000000 0.000000 0.000000 ( 0.000034) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=5] | |
================================================================ | |
[Benchmarking for n=10] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000168) | |
second 0.000000 0.000000 0.000000 ( 0.000137) | |
third 0.000000 0.000000 0.000000 ( 0.000169) | |
sorted_diff with O(n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000062) | |
second 0.000000 0.000000 0.000000 ( 0.000048) | |
third 0.000000 0.000000 0.000000 ( 0.000056) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=10] | |
================================================================ | |
[Benchmarking for n=100] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.004804) | |
second 0.010000 0.000000 0.010000 ( 0.002245) | |
third 0.000000 0.000000 0.000000 ( 0.002212) | |
sorted_diff with O(n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.000281) | |
second 0.000000 0.000000 0.000000 ( 0.000405) | |
third 0.000000 0.000000 0.000000 ( 0.000269) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=100] | |
================================================================ | |
[Benchmarking for n=1000] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.030000 0.000000 0.030000 ( 0.037513) | |
second 0.040000 0.000000 0.040000 ( 0.033168) | |
third 0.030000 0.000000 0.030000 ( 0.039381) | |
sorted_diff with O(n) | |
user system total real | |
first 0.000000 0.000000 0.000000 ( 0.002537) | |
second 0.000000 0.000000 0.000000 ( 0.002503) | |
third 0.010000 0.000000 0.010000 ( 0.002499) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=1000] | |
================================================================ | |
[Benchmarking for n=2000] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.070000 0.000000 0.070000 ( 0.072813) | |
second 0.070000 0.000000 0.070000 ( 0.071853) | |
third 0.070000 0.000000 0.070000 ( 0.072195) | |
sorted_diff with O(n) | |
user system total real | |
first 0.010000 0.000000 0.010000 ( 0.004950) | |
second 0.000000 0.000000 0.000000 ( 0.004924) | |
third 0.010000 0.000000 0.010000 ( 0.004967) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=2000] | |
================================================================ | |
[Benchmarking for n=5000] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.200000 0.000000 0.200000 ( 0.204547) | |
second 0.200000 0.000000 0.200000 ( 0.203259) | |
third 0.200000 0.000000 0.200000 ( 0.203219) | |
sorted_diff with O(n) | |
user system total real | |
first 0.010000 0.000000 0.010000 ( 0.012261) | |
second 0.020000 0.000000 0.020000 ( 0.012820) | |
third 0.010000 0.000000 0.010000 ( 0.014017) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=5000] | |
================================================================ | |
[Benchmarking for n=10000] | |
small_sorted_diff with O(n log n) | |
user system total real | |
first 0.440000 0.000000 0.440000 ( 0.438518) | |
second 0.430000 0.000000 0.430000 ( 0.439827) | |
third 0.440000 0.000000 0.440000 ( 0.446619) | |
sorted_diff with O(n) | |
user system total real | |
first 0.020000 0.000000 0.020000 ( 0.025081) | |
second 0.030000 0.000000 0.030000 ( 0.024960) | |
third 0.020000 0.000000 0.020000 ( 0.027038) | |
Just for checking: Both results are equivalent? true | |
[Finished benchmarking for n=10000] | |
================================================================ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Array | |
# | |
# Standard algorithm for solving Longest Increasing Subsequence | |
# problem http://en.wikipedia.org/wiki/Longest_increasing_subsequence | |
# Worst case complexity is O(n log n). | |
# This is ok for n < 10 | |
# | |
def small_sorted_diff(other) | |
added = []; removed = [] | |
self.each { |el| added << el unless other.binary_search(el) } | |
other.each { |el| removed << el unless self.binary_search(el) } | |
{ :added => added, :removed => removed } | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Array | |
# Largest range with different subsequence, | |
# without any kind of clever optimization. | |
def range_with_diff_subseq(other) | |
self_size = self.size | |
other_size = other.size | |
low = 0 | |
max_pos = self_size < other_size ? self_size : other_size | |
# trim beginning with the same value | |
while low < max_pos and self[low] == other[low] | |
low += 1 | |
end | |
high = -1 | |
max_pos = -max_pos | |
# trim end with the same value | |
while high > max_pos and self[high] == other[high] | |
high -= 1 | |
end | |
# the two arrays within this range contain different values | |
(low..high) | |
end | |
# | |
# Diff with another array (both array of integers). | |
# self and other must be already sorted. | |
# Worst case complexity is O(n). | |
# Inspired by Knuth-Morris-Pratt algorithm | |
# http://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm | |
# and Longest Common Subsequence problem | |
# http://en.wikipedia.org/wiki/Longest_common_subsequence_problem | |
# | |
# And I think this is more like solving Longest Increasing Subsequence | |
# problem but with unique values in both arrays | |
# http://en.wikipedia.org/wiki/Longest_increasing_subsequence | |
# | |
# Examples: | |
# a = [1,2,3,5,8,9] | |
# b = [9] | |
# p b.sorted_diff(a) # {:removed=>[1, 2, 3, 5, 8], :added=>[]} | |
# p a.sorted_diff(b) # {:removed=>[], :added=>[1, 2, 3, 5, 8]} | |
# p a.sorted_diff(a) # {:removed=>[], :added=>[]} | |
# | |
def sorted_diff(other) | |
# Longest Common Subsequence Optimization | |
trimmed_range = self.range_with_diff_subseq other | |
trimmed_self = self[trimmed_range] | |
trimmed_other = other[trimmed_range] | |
self_size = trimmed_self.size | |
other_size = trimmed_other.size | |
self_i = 0; other_i = 0 | |
added = []; removed = [] | |
# This looks similar to Knuth-Morris-Pratt algorithm | |
while self_i < self_size and other_i < other_size | |
if trimmed_self[self_i] == trimmed_other[other_i] | |
self_i += 1; other_i += 1 | |
# Immediately decide if any of the compared elements | |
# are to be removed or added. | |
elsif trimmed_self[self_i] < trimmed_other[other_i] | |
added << trimmed_self[self_i] | |
self_i += 1 | |
else | |
removed << trimmed_other[other_i] | |
other_i += 1 | |
end | |
end | |
# Add the rest | |
if other_i == other_size | |
sub_self = trimmed_self[self_i...self_size] | |
added.concat sub_self unless sub_self.empty? | |
end | |
# Remove the rest | |
if self_i == self_size | |
sub_other = trimmed_other[other_i...other_size] | |
removed.concat sub_other unless sub_other.empty? | |
end | |
{ :added => added, :removed => removed } | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'benchmark' | |
def do_benchmark(n) | |
puts "[Benchmarking for n=#{n}]" | |
a = []; b = [] | |
for i in 1..n | |
if i.even? | |
a << i | |
else | |
b << i | |
end | |
end | |
result_1 = nil | |
puts "small_sorted_diff with O(n log n)" | |
Benchmark.bm(7) do |x| | |
x.report('first') { result_1 = a.small_sorted_diff b } | |
x.report('second') { result_1 = a.small_sorted_diff b } | |
x.report('third') { result_1 = a.small_sorted_diff b } | |
end | |
result_2 = nil | |
puts "sorted_diff with O(n)" | |
Benchmark.bm(7) do |x| | |
x.report('first') { result_2 = a.sorted_diff b } | |
x.report('second') { result_2 = a.sorted_diff b } | |
x.report('third') { result_2 = a.sorted_diff b } | |
end | |
puts "Just for checking: Both results are equivalent? #{result_1 and result_2 and result_1 == result_2}" | |
puts "[Finished benchmarking for n=#{n}]" | |
puts "================================================================" | |
puts " " | |
end | |
do_benchmark 3 | |
do_benchmark 5 | |
do_benchmark 10 | |
do_benchmark 100 | |
do_benchmark 1000 | |
do_benchmark 2000 | |
do_benchmark 5000 | |
do_benchmark 10000 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time_t timer = time(NULL); | |
printf("Local time is %s\n", ctime(&timer)); | |
printf("GMT time is: %s\n", asctime(gmtime(&timer))); | |
char buffer[80]; | |
strftime(buffer, 80, "%Y-%m-%d %H:%M:%S", gmtime(&timer)); | |
puts(buffer); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment