Created
February 11, 2016 21:17
-
-
Save tatome/90d78e5d8dec44bfe8cb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf-8 | |
# | |
#!/usr/bin/python2 | |
# | |
# Copyright 2013 Johannes Bauer, Universitaet Hamburg | |
# | |
# This file is free software. Do with it whatever you like. | |
# It comes with no warranty, explicit or implicit, whatsoever. | |
# | |
# This python script implements a simple SOM in arbitrary dimensions. | |
# | |
# If you find it useful or if you have any questions, do not | |
# hesitate to contact me at | |
# bauer at informatik dot uni dash hamburg dot de. | |
# | |
import numpy | |
import logging | |
logger = logging.getLogger(__name__) | |
class SOM(object): | |
def __init__(self, dimensions, datalength): | |
""" | |
Parameters: | |
dimensions : the dimensions of the SOM (sequence of integers). | |
datalength : length of input vectors. | |
""" | |
self.weights = numpy.random.rand(*(tuple(dimensions) + (datalength,))) * .1 | |
self.dimensions = dimensions | |
def response(self, datapoint): | |
""" Compute the response of the SOM to a given input. | |
Parameters: | |
datapoint : an input data point. | |
Returns: | |
activation of the network --- an array with the same dimensionality as the SOM with the squared | |
euclidean distance of each unit from the datapoint. | |
""" | |
logger.debug('computing SOM response.') | |
return numpy.sum(numpy.square(self.weights - datapoint), axis = (len(self.weights.shape)-1)) | |
def _h(self, sigma, center): | |
def f(*vectors): | |
normalizers = [max(1,d-1) for d in self.dimensions] | |
normed = [v/s for v,s in zip(vectors, normalizers)] | |
sqdiff = [(n-(float(c)/s))**2 for n,c,s in zip(normed, center, normalizers)] | |
return numpy.exp(-numpy.sum(sqdiff, axis=0) / sigma**2) | |
return numpy.fromfunction(f, self.weights.shape) | |
def bmuIndex(self, response): | |
""" A convenience method for computing the index of the BMU for some response. | |
Parameters: | |
response : this SOM's response to some input. | |
Returns: | |
the index of the BMU (a tuple whose length is the number of dimensions of the SOM grid.) | |
""" | |
maxindex = response.argmin() | |
maxindex = numpy.unravel_index(indices = maxindex, dims = response.shape) | |
return maxindex | |
def update(self, datapoint, sigma, strength): | |
""" Update the SOM units' weights with the given datapoint. | |
Parameters: | |
datapoint : the datapoint with which to update. | |
sigma : the width of the Gaussian neighborhood interaction function h for the update. | |
strength : the strength of the update. | |
Returns: | |
The response to the network *before the update*. See response() | |
""" | |
logger.debug("Updating. Datapoint: %s.", datapoint) | |
response = self.response(datapoint) | |
maxindex = self.bmuIndex(response) | |
logger.debug("BMU index: %s", maxindex) | |
logger.debug("BMU weights before: %s", self.weights[maxindex]) | |
h = strength * self._h(sigma, maxindex) | |
self.weights = self.weights + (datapoint - self.weights) * h | |
logger.debug("BMU weights after: %s", self.weights[maxindex]) | |
return response | |
if __name__ == '__main__': | |
logging.basicConfig(level=logging.INFO) | |
logging.info("Just a second...") | |
from matplotlib import pyplot as plt | |
def visualize(som): | |
im = numpy.zeros((100,100,3)) | |
im[:,:,0:2] = som.weights | |
plt.imshow(im) | |
plt.show() | |
logging.info("Training an example 100x100-unit SOM with random data. Please wait.") | |
mysom = SOM((100,100), 2) | |
steps = 1000 | |
for width,strength in zip(numpy.linspace(1.14,.01,steps), numpy.linspace(.3,.001, steps)): | |
mysom.update((numpy.random.random(),numpy.random.random()), width, strength) | |
logging.info("Done. Showing a visualization of the SOM units' weights.") | |
visualize(mysom) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I've implemented a simple, bog-standard Self-Organizing Map (aka. Kohonen Map) as a benchmark to compare my algorithms to. It uses numpy and is compatible with Python 2.7.
It does not (currently) have a sophisticated topology, like a toroid or hexagonal grid.