skyl · November 12, 2011 03:15
diff --git a/gistfile1.py b/gistfile1.py
 import math
 import random


 class ClusterCenters(object):
    """
    >> d = ((0,0,0), (0,1,0), (0, 2, 0), (1,2,0), (55, 55,50), (50,50,50), (-100,-100,-100), (-80,-80,-100), (60,60,50))
    >> cc = ClusterCenters(d, 3)
    [[55, 55, 50], [0, 1, 0], [-90, -90, -100]]
    >> cc = ClusterCenters(d, 2)
    [[55, 55, 50], [-30, -30, -34]]
    >> cc = ClusterCenters(d, 2)
    [[-90, -90, -100], [23, 24, 21]]

    note the order can be random and there are a few solutions so this is not a doctest.
    """

    def __init__(self, data, k):
        """
        data is an iterable of tuples.
        """
        self.found = 0
        self.k = k
        self.data = data
        dim = self.dim = len(self.data[0])
        maxi = lambda i: max(x[i] for x in self.data)
        mini = lambda i: min(x[i] for x in self.data)

        self.guesses = []
        for guessi in range(k):
            guess = []

            for i in range(dim):
                guess.append(random.triangular(mini(i), maxi(i)))

            self.guesses.append(guess)

        self.initial_convert_and_sort()

    def initial_convert_and_sort(self):

        self.data = self.get_data(self.data)
        self.unpack_new_centers()

    def get_data(self, points):
        new_data = []

        for point in points:
            distances = []
            for guess in self.guesses:
                distance = math.sqrt(
                    sum((point[i] - guess[i])**2 for i in range(self.dim))
                )
                distances.append(distance)

            ki = distances.index(min(distances))

            d = {
                "ki": ki,
                "point": point
            }
            new_data.append(d)

        return new_data

    def unpack_new_centers(self):
        for ki, guess in enumerate(self.guesses):

            points = [d["point"] for d in self.data if d["ki"] == ki]
            new_guess = []

            if points:
                for i in range(self.dim):
                    iguess = sum(point[i] for point in points) / len(points)
                    new_guess.insert(i, iguess)

                self.guesses[ki] = new_guess
            else:
                mini = lambda i: min(d["point"][i] for d in self.data)
                maxi = lambda i: max(d["point"][i] for d in self.data)
                self.guesses[ki] = [random.triangular(mini(i), maxi(i)) for i in range(self.dim)]
                
        self.correspond_nearest()

    def correspond_nearest(self):

        points = [d["point"] for d in self.data]
        new_data = self.get_data(points)

        if new_data == self.data:
            self.found += 1
            if self.found > 10:
                print self.guesses
            else:
                self.unpack_new_centers()
        else:
            self.data = new_data
            self.unpack_new_centers()
	import math
	import random


	class ClusterCenters(object):
	"""
	>> d = ((0,0,0), (0,1,0), (0, 2, 0), (1,2,0), (55, 55,50), (50,50,50), (-100,-100,-100), (-80,-80,-100), (60,60,50))
	>> cc = ClusterCenters(d, 3)
	[[55, 55, 50], [0, 1, 0], [-90, -90, -100]]
	>> cc = ClusterCenters(d, 2)
	[[55, 55, 50], [-30, -30, -34]]
	>> cc = ClusterCenters(d, 2)
	[[-90, -90, -100], [23, 24, 21]]

	note the order can be random and there are a few solutions so this is not a doctest.
	"""

	def __init__(self, data, k):
	"""
	data is an iterable of tuples.
	"""
	self.found = 0
	self.k = k
	self.data = data
	dim = self.dim = len(self.data[0])
	maxi = lambda i: max(x[i] for x in self.data)
	mini = lambda i: min(x[i] for x in self.data)

	self.guesses = []
	for guessi in range(k):
	guess = []

	for i in range(dim):
	guess.append(random.triangular(mini(i), maxi(i)))

	self.guesses.append(guess)

	self.initial_convert_and_sort()

	def initial_convert_and_sort(self):

	self.data = self.get_data(self.data)
	self.unpack_new_centers()

	def get_data(self, points):
	new_data = []

	for point in points:
	distances = []
	for guess in self.guesses:
	distance = math.sqrt(
	sum((point[i] - guess[i])**2 for i in range(self.dim))
	)
	distances.append(distance)

	ki = distances.index(min(distances))

	d = {
	"ki": ki,
	"point": point
	}
	new_data.append(d)

	return new_data

	def unpack_new_centers(self):
	for ki, guess in enumerate(self.guesses):

	points = [d["point"] for d in self.data if d["ki"] == ki]
	new_guess = []

	if points:
	for i in range(self.dim):
	iguess = sum(point[i] for point in points) / len(points)
	new_guess.insert(i, iguess)

	self.guesses[ki] = new_guess
	else:
	mini = lambda i: min(d["point"][i] for d in self.data)
	maxi = lambda i: max(d["point"][i] for d in self.data)
	self.guesses[ki] = [random.triangular(mini(i), maxi(i)) for i in range(self.dim)]

	self.correspond_nearest()

	def correspond_nearest(self):

	points = [d["point"] for d in self.data]
	new_data = self.get_data(points)

	if new_data == self.data:
	self.found += 1
	if self.found > 10:
	print self.guesses
	else:
	self.unpack_new_centers()
	else:
	self.data = new_data
	self.unpack_new_centers()