Skip to content

Instantly share code, notes, and snippets.

@avescodes
Created November 11, 2009 04:17
Show Gist options
  • Save avescodes/231552 to your computer and use it in GitHub Desktop.
Save avescodes/231552 to your computer and use it in GitHub Desktop.
A B C D E
A
B 16
C 15 4
D 8 7 8
E 10 3 6 11
# Necessary because Ruby doesn't play nice and deep copy when using Hash#dup
class Object; def deep_dup; Marshal.load( Marshal.dump( self ) ); end; end
# Import a matrix (i.e. example.txt) and turn it into a sparse matrix impl. with a hash
def process_matrix(input)
m = {}
input = input.lines.to_a
keys = input.shift.strip.split(' ')
input.map! {|line| line.gsub(/^./,'').strip.split(' ').map(&:to_i) }
input.each_with_index do |line,row|
(0...(keys.length - (keys.length - row))).each do |col|
m[keys[row]] ||= {}
m[keys[row]][keys[col]] = line[col]
end
end
return m
end
def sparse_delete(m,key)
m = m.deep_dup
m.delete(key)
m.each_value {|sub_m| sub_m.delete(key) }
return m
end
def sparse_minimum_pair(m)
min, min_val = ["",""], (1.0/0)
m.each do |row,sub_m|
sub_m.each do |col,val|
min,min_val = [row,col], val if min_val > val
end
end
min
end
def sparse_access(m,a,b)
(m[a] ? m[a][b] : nil) || (m[b] ? m[b][a] : nil)
end
# Calculate the WPGMA of a given matrix
def wpgma(old)
min_pair = sparse_minimum_pair(old)
m = sparse_delete(old,min_pair[-1])
m.each {|k,v| m.delete(k) if v == {} }
# Fix all m[other][new_pair] to be proper
m.each do |row,sub_m|
sub_m.each do |col,val|
if col == min_pair[0]
m[row][min_pair] = ( sparse_access(old,min_pair[0],row) +
sparse_access(old,min_pair[-1],row) ) / 2.0
m[row].delete(col)
end
end
end
if(m.size > 1)
# Fix all m[new_pair] to be proper
m[min_pair] = m.delete(min_pair.first).inject({}) do |h,pair|
k,v = pair
# puts "#{min_pair.inspect}, #{k.inspect}"
h[k] = ( sparse_access(old,min_pair[0],k) +
sparse_access(old,min_pair[-1],k) ) / 2.0
h
end
# and recurse
return wpgma(m)
else # else we are done, extract the final tree
first_part = m.keys.first
second_part = m[first_part].keys.first
return [first_part,second_part]
end
end
if __FILE__ == $0
input = File.read(ARGV.shift)
matrix = process_matrix(input)
puts wpgma(matrix).inspect
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment