Created
January 18, 2012 13:03
-
-
Save uiur/1632915 to your computer and use it in GitHub Desktop.
カニバリズム山脈クラスタリング
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Cluster | |
attr_accessor :left, :right, :vec, :id, :distance | |
def initialize(args) | |
@vec = args[:vec] | |
@left = args[:left] | |
@right = args[:right] | |
@id = args[:id] | |
@distance = args[:distance] | |
end | |
def leaf? | |
!self.left && !self.right | |
end | |
def self.tanimoto(c1,c2) | |
n = c1.vec.size | |
n1, n2 = 0, 0 | |
cross = 0 | |
v1, v2 = c1.vec, c2.vec | |
n.times do |i| | |
n1 += 1 unless v1[i] == 0 | |
n2 += 1 unless v2[i] == 0 | |
cross += 1 if v1[i] != 0 && v2[i] != 0 | |
end | |
1.0 - cross.to_f / (n1 + n2 - cross) | |
end | |
end | |
def hclusters(rows) | |
currentid = -1 | |
distances = {} | |
clust = rows.map {|k,v| Cluster.new(vec: v, id: k) } | |
n = clust[0].vec.size | |
while clust.size > 1 | |
p clust.size | |
lowestpair = [0,1] | |
closest = Cluster.tanimoto(clust[0], clust[1]) | |
clust.size.times do |i| | |
(i+1...clust.size).each do |j| | |
distances[[clust[i].id,clust[j].id]] ||= Cluster.tanimoto(clust[i], clust[j]) | |
d = distances[[clust[i].id, clust[j].id]] | |
if d < closest | |
closest = d | |
lowestpair = [i,j] | |
end | |
end | |
end | |
mergevec = [] | |
n.times do |i| | |
mergevec << (clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0 | |
end | |
newclust = Cluster.new(vec: mergevec, id: currentid, left: clust[lowestpair[0]], right: clust[lowestpair[1]],distance: closest) | |
clust.delete_at(lowestpair[0]) | |
clust.delete_at(lowestpair[1]-1) | |
clust << newclust | |
currentid -= 1 | |
end | |
return clust[0] | |
end | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'cairo' | |
def tanimoto(user1, user2) | |
ids1 = user1[:subject_ids] | |
ids2 = user2[:subject_ids] | |
1.0 - ((ids1 & ids2).size.to_f / (ids1 | ids2).size) | |
end | |
def scaledown(data,m=1000,rate=0.001) | |
n = data.size | |
dist = Array.new(n) { Array.new(n, nil) } | |
fakedist = Array.new(n) { Array.new(n, nil) } | |
n.times do |i| | |
n.times do |j| | |
if i == j | |
dist[i][j] = 0.0 | |
else | |
dist[i][j] = tanimoto(data[i], data[j]) | |
end | |
end | |
end | |
lasterror = nil | |
pos = Array.new(n) { [rand, rand] } | |
m.times do |m| | |
n.times do |i| | |
n.times do |j| | |
fakedist[i][j] = Math.sqrt((pos[i][0] - pos[j][0]) ** 2 + (pos[i][1] - pos[j][1]) ** 2) | |
end | |
end | |
grad = Array.new(n) { [0.0,0.0] } | |
n.times do |i| | |
n.times do |j| | |
next if i == j | |
errorterm = (fakedist[i][j] - dist[i][j]) | |
grad[i][0] += ((pos[i][0] - pos[j][0]) / fakedist[i][j])*errorterm | |
grad[i][1] += ((pos[i][1] - pos[j][1]) / fakedist[i][j])*errorterm | |
end | |
end | |
n.times do |i| | |
pos[i][0] -= rate*grad[i][0] | |
pos[i][1] -= rate*grad[i][1] | |
end | |
p grad[0] | |
p pos[0] | |
end | |
pos | |
end | |
def draw(data, labels=$labels, png='kanisan_cluster.png') | |
w = 2000 | |
h = 2000 | |
surface = Cairo::ImageSurface.new(w, h) | |
context = Cairo::Context.new(surface) | |
context.set_source_rgb(255, 255, 255) | |
context.rectangle(0, 0, w, h) | |
context.fill | |
context.set_source_rgb(0, 0, 0) | |
context.font_size = 25 | |
data.size.times do |i| | |
x = (data[i][0] + 0.5)*1000 | |
y = (data[i][1] + 0.5)*1000 | |
context.move_to(x,y) | |
context.show_text(labels[i]) | |
end | |
surface.write_to_png(png) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment