Last active
August 29, 2015 13:55
-
-
Save cwidmer/8752502 to your computer and use it in GitHub Desktop.
toy example for learning the similarity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.6 | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# Written (W) 2013 Christian Widmer | |
# Copyright (C) 2013 Max-Planck-Society, MSKCC, TU-Berlin | |
""" | |
Created on 04.03.2013 | |
@author: Christian Widmer | |
@summary: Toy experiment for MT-MKL | |
""" | |
import numpy as np | |
import copy | |
import random | |
import pylab | |
import dcd | |
import scipy.cluster.hierarchy as sch | |
leaf_vectors = [] | |
def mutate_vector(vec, num_mut): | |
""" | |
takes in vector and flips positions | |
""" | |
if num_mut > 0: | |
positions = random.sample(range(len(vec)), num_mut) | |
for pos in positions: | |
vec[pos] = vec[pos]*(-1) | |
return vec | |
def mutate_edge(vec, depth, degree): | |
""" | |
recursive mutation procedure | |
""" | |
#mutation_rate = 8 | |
mutation_rate, = random.sample(range(1,15),1) | |
vec_copy = copy.copy(vec) | |
vec_copy = mutate_vector(vec_copy, mutation_rate) | |
if depth == 5: | |
leaf_vectors.append(vec_copy) | |
return | |
else: | |
# recursion | |
for d in xrange(degree): | |
mutate_edge(vec_copy, depth+1, degree) | |
def compute_sim(vectors): | |
""" | |
compute similarity matrix from vectors | |
""" | |
num_vec = len(vectors) | |
sim = np.zeros((num_vec, num_vec)) | |
for i, v_lhs in enumerate(vectors): | |
for j, v_rhs in enumerate(vectors): | |
sim[i, j] = np.dot(v_lhs, v_rhs) | |
return sim | |
def sample_points(diff_vec, num_points): | |
""" | |
sample points from multidimensional isotropic gaussians, | |
given their difference vector | |
""" | |
sigma = 20.0 | |
points_per_class = num_points / 2 | |
num_dim = len(diff_vec) | |
mean_pos = diff_vec * 0.5 | |
mean_neg = diff_vec * -0.5 | |
xt_pos = np.random.multivariate_normal(mean_pos, sigma*np.eye(num_dim), points_per_class) | |
xt_neg = np.random.multivariate_normal(mean_neg, sigma*np.eye(num_dim), points_per_class) | |
xt = np.vstack([xt_pos, xt_neg]) | |
lt = np.array( [1]*points_per_class + [-1]*points_per_class ) | |
ret = {"xt": xt, "lt": lt} | |
return ret | |
def generate_data(num_dim, xt_per_task_train, xt_per_task_test): | |
""" | |
generate toy data set | |
""" | |
# clear list | |
del leaf_vectors[:] | |
start_vec = np.ones(num_dim) | |
mutate_edge(start_vec, 0, 2) | |
sim = compute_sim(leaf_vectors) | |
#Y = sch.linkage(sim, method='centroid') | |
#print Y | |
#plot_dendrogram(sim, Y) | |
Y = create_linkage_matrix(32) | |
plot_dendrogram(sim, Y) | |
pylab.figure() | |
Z1 = sch.dendrogram(Y, orientation='right') | |
pylab.imshow(sim, interpolation="nearest") | |
pylab.show() | |
train_data = {} | |
test_data = {} | |
for task_num, v in enumerate(leaf_vectors): | |
train_data[task_num] = sample_points(v, xt_per_task_train) | |
test_data[task_num] = sample_points(v, xt_per_task_test) | |
return train_data, test_data, sim | |
def create_mask_stack_level(num_tasks): | |
""" | |
create stack of matrices for | |
each inner node of binary tree | |
""" | |
num_levels = np.log2(num_tasks) | |
stack = np.zeros((num_levels, num_tasks, num_tasks)) | |
for i in range(num_levels): | |
block_size = 2 ** i | |
num_blocks = int(num_tasks / block_size) | |
for j in range(0, num_tasks, block_size): | |
stack[i,j:j+block_size, j:j+block_size] = 1.0 / float(block_size) | |
return stack | |
def create_mask_stack(num_tasks): | |
""" | |
create stack of matrices for | |
each inner node of binary tree | |
""" | |
num_levels = int(np.log2(num_tasks)) | |
stack = [] | |
# don't use leaves | |
for i in range(1, num_levels): | |
block_size = 2 ** i | |
#num_blocks = int(num_tasks / block_size) | |
for j in range(0, num_tasks, block_size): | |
mask = np.zeros((num_tasks, num_tasks)) | |
mask[j:j+block_size, j:j+block_size] = 1.0 | |
stack.append(mask) | |
# add all ones | |
stack.append(np.ones((num_tasks, num_tasks))) | |
# make sure we have zeros on main diagonal | |
for i in xrange(len(stack)): | |
for j in xrange(num_tasks): | |
stack[i][j, j] = 0.0 | |
# add eye | |
stack.append(np.eye(num_tasks, num_tasks)) | |
stack = np.array(stack) | |
print "stack size", len(stack) | |
return stack | |
def create_linkage_matrix(num_tasks): | |
""" | |
create stack of matrices for | |
each inner node of binary tree | |
""" | |
num_levels = int(np.log2(num_tasks)) | |
stack = [] | |
cumulative = 0 | |
for i in range(num_levels): | |
block_size = 2 ** i | |
num_blocks = int(num_tasks / block_size) | |
for j in range(0, num_blocks, 2): | |
row = [cumulative+j, cumulative+j+1, i+1, block_size*2] | |
print row | |
stack.append(row) | |
cumulative += num_blocks | |
stack = np.array(stack, dtype=np.float64) | |
return stack | |
def plot_dendrogram(data, Y): | |
""" | |
http://stackoverflow.com/questions/2982929/plotting-results-of-hierarchical-clustering-ontop-of-a-matrix-of-data-in-python | |
""" | |
D = copy.copy(data) | |
D /= D[0,0] | |
# Compute and plot first dendrogram. | |
fig = pylab.figure(figsize=(8,8)) | |
ax1 = fig.add_axes([0.09,0.1,0.2,0.6]) | |
ax1.set_axis_off() | |
Z1 = sch.dendrogram(Y, orientation='right', link_color_func=lambda k: "black") | |
ax1.set_xticks([]) | |
ax1.set_yticks([]) | |
# Compute and plot second dendrogram. | |
ax2 = fig.add_axes([0.3,0.71,0.6,0.2]) | |
ax2.set_axis_off() | |
Z2 = sch.dendrogram(Y, link_color_func=lambda k: "black") | |
ax2.set_xticks([]) | |
ax2.set_yticks([]) | |
# Plot distance matrix. | |
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6]) | |
idx1 = Z1['leaves'] | |
idx2 = Z2['leaves'] | |
D = D[idx1,:] | |
D = D[:,idx2] | |
im = axmatrix.matshow(D, aspect='auto', origin='lower') #, cmap=pylab.cm.YlGnBu) | |
axmatrix.set_xticks([]) | |
axmatrix.set_yticks([]) | |
# Plot colorbar. | |
axcolor = fig.add_axes([0.91,0.1,0.02,0.6]) | |
pylab.colorbar(im, cax=axcolor) | |
fig.show() | |
pylab.show() | |
def hierarchical_mask(): | |
""" | |
""" | |
#solver = "dcd" | |
solver = "dcd_shogun" | |
epsilon = 1e-3 | |
record_interval = 0 | |
min_interval = 0 | |
cost = 0.00001 | |
# define task similarity matrix | |
num_tasks = 32 | |
num_dim = 100 | |
num_train = 10 | |
num_test = 1000 | |
train_data, test_data, _ = generate_data(num_dim, num_train, num_test) | |
task_sim = create_mask_stack(num_tasks) | |
normalize = True | |
ridge = 0.01 | |
# MT-MKL | |
for p_norm in [1.1, 2.0, 3.0]: | |
trained_solver = dcd.train_mtl_svm(train_data, task_sim, solver, cost, epsilon, True, p_norm, record_interval, min_interval, normalize, ridge) | |
trained_solver.predict(test_data, text="MTMKL p-norm = %.2f" % p_norm) | |
# vanilla | |
trained_solver = dcd.train_mtl_svm(train_data, task_sim, solver, cost, epsilon, False, p_norm, record_interval, min_interval, normalize, ridge) | |
trained_solver.predict(test_data, text="Vanilla") | |
# individual | |
task_sim = np.zeros((1, num_tasks, num_tasks)) | |
task_sim[0] = np.eye(num_tasks) | |
invididual_solver = dcd.train_mtl_svm(train_data, task_sim, solver, cost, epsilon, False, p_norm, record_interval, min_interval, normalize, ridge) | |
invididual_solver.predict(test_data, text="Individual") | |
# union | |
task_sim = np.zeros((1, num_tasks, num_tasks)) | |
task_sim[0] = np.ones(num_tasks) - np.eye(num_tasks) | |
invididual_solver = dcd.train_mtl_svm(train_data, task_sim, solver, cost, epsilon, False, p_norm, record_interval, min_interval, normalize, ridge) | |
invididual_solver.predict(test_data, text="Union") | |
pylab.grid(True) | |
pylab.show() | |
if __name__ == "__main__" or __name__ == "pyreport.main": | |
hierarchical_mask() | |
#create_mask_stack(8) | |
#test_mtl() | |
#run_experiment() | |
#generate_data(100, 10, 100) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment