ialexpovad · February 17, 2023 07:08
diff --git a/fcm.py b/fcm.py
 # %% [markdown]
 # # Fuzzy C-Means Clustering

 # %%
 import re
 import sys
 import datetime
 import os
 import math
 exceptions = (TypeError , SyntaxError, re.error, AttributeError , ValueError , NotImplementedError , Exception , RuntimeError , ImportError)

 # https://medium.com/analytics-vidhya/fuzzy-sets-fuzzy-c-means-clustering-algorithm-ac5c4386396b
 # https://towardsdatascience.com/fuzzy-c-means-clustering-with-python-f4908c714081

 # %%
 def ExceptionOutput(exc_info = None, extraInfo = True):
    ''' 
    Console output for exceptions.
    Use in `except:`: Error = ExceptionOutput(sys.exc_info()).
    Prints Time, ExceptionType, Filename+Line and (if extraInfo in not False) the exception description to the console.
    Returns a string.
    '''
    try:
        print(str(datetime.datetime.now().strftime('%H:%M:%S')),":")
        if exc_info==None:
            exc_type, exc_obj, exc_tb = sys.exc_info()
        else:
            exc_type, exc_obj, exc_tb = exc_info
        fName = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        if extraInfo:
            print(exc_type, " in", fName, " line", exc_tb.tb_lineno ,": ", exc_obj)
        else:
            print(exc_type, " in", fName, " line", exc_tb.tb_lineno)
        return str(exc_type)+": "+str(exc_obj)
    except exceptions as inst:
        print("An exception occurred while trying to print an exception!")
        print(inst)

 # %% [markdown]
 # Read data from csv-file as a data set and returns array of tuples. The data representation must be in csv format with tab, comma or space delimiters. The data array is represented as vectors along the 1 axis.

 # %%
 def readData(file):
    '''
    Read data from csv-file.
    :param file: name file in current directory.
    :return: array of tuples.
    '''
    data = list() # initialize variable data as list[]
    try:
        with open(f"{file}.csv", "r") as f: 
            dataRead = [re.split(',', line.rstrip('\n')) for line in f]  
            for i in range(len(dataRead)):
                dataRead[i] = [float(j) for j in dataRead[i]]
            for k in dataRead:
                data.append(tuple(k))
            return data    
    except exceptions as exp:
        ExceptionOutput(sys.exc_info())
        # print(exp)
        # print("Tab was set as a separator.")
    try:
        with open(f"{file}.csv", "r") as f: 
            # dataRead.clear() 
            dataRead = [re.split('\t', line.rstrip('\n')) for line in f]  
            for i in range(len(dataRead)):
                dataRead[i] = [float(j) for j in dataRead[i]]
            for k in dataRead:
                data.append(tuple(k))
            return data
    except exceptions as exp:
        ExceptionOutput(sys.exc_info())
    try:
        with open(f"{file}.csv", "r") as f: 
            # dataRead.clear() 
            dataRead = [re.split(' ', line.rstrip('\n')) for line in f]  
            for i in range(len(dataRead)):
                dataRead[i] = [float(j) for j in dataRead[i]]
            for k in dataRead:
                data.append(tuple(k))
            return data
    except exceptions:
        ExceptionOutput(sys.exc_info())

 # %%
 def compute_EuclideanDistance(x, c):
    ''' 
    One of the methods calculate Euclidean Distance 
    of x and c in their vector space.
    :param x: component x from matrix X in formula.
    :param c: component c from matrix C in formula. 
    :return: distance of x and c in their vector space.
    '''
    try:
        sum = 0
        for i in range(len(x)):
            sum += pow(x[i] - c[i], 2)
        norma = math.sqrt(sum)
        return norma
    except exceptions:
        ExceptionOutput(sys.exc_info())

 # %%
 def checkConvergence(previous: list, next: list, error: float) -> bool:
    """
    Function allows you to determine the convergence previoud value and next centroids.
    :return: 'True' if you have reached convergence, 'False' is otherwise.
    """
    for i in range(len(previous)):
        if compute_EuclideanDistance(previous[i], next[i]) > error:
            return False
        else:    
            return True

 # %% [markdown]
 # 

 # %%
 def FCM(x: list, c: list, m: int, it: int):
    '''
    Perform Fuzzy C-Means Clustering (FCM) algorithm.
    :param x:   imported all data.
    :param c:   initialize cluster centers array.
    :param m:   hyper- parameter that controls how fuzzy the cluster will be. 
                The higher it is, the fuzzier the cluster will be in the end. 
    :param it:  numper of iterations (termination condition).
    :return:    array of cluster centers and w_ij if it reached to a convergence.
    '''
    # set values for the number of cluster C
    u = [[0 for _ in range(len(x))] for _ in range(len(c))]

    new_c = c
    for _ in range(it):
        old_c = new_c
        # 
        # Calculate u_ij
        for i in range(len(c)):
            for j in range(len(x)):
                divider = 0
                x_j_minus_c_i = compute_EuclideanDistance(x[j],new_c[i])
                for l in range(len(c)):
                    divider += pow(x_j_minus_c_i / compute_EuclideanDistance(x[j],new_c[l]), 2 / (m - 1))
                u[i][j] = 1 / divider
        
        # Calculate c_i
        new_c = []
        for i in range(len(c)):
            sum = [0] * len(c[0])
            for j in range(len(x)):
                degree = pow(u[i][j], m)
                for l in range(len(c[0])):
                    sum[l] += degree * x[j][l]
            _divider = 0
            for q in u[i]:
                _divider += pow(q,m)
            for j in range(len(sum)):
                sum[j] /= _divider
            new_c.append(tuple(sum))
        if checkConvergence(old_c, new_c, 0.001):
            return new_c, u

    return new_c, u

 # TODO: add subtractive clustering algorithm

 # %%
	# %% [markdown]
	# # Fuzzy C-Means Clustering

	# %%
	import re
	import sys
	import datetime
	import os
	import math
	exceptions = (TypeError , SyntaxError, re.error, AttributeError , ValueError , NotImplementedError , Exception , RuntimeError , ImportError)

	# https://medium.com/analytics-vidhya/fuzzy-sets-fuzzy-c-means-clustering-algorithm-ac5c4386396b
	# https://towardsdatascience.com/fuzzy-c-means-clustering-with-python-f4908c714081

	# %%
	def ExceptionOutput(exc_info = None, extraInfo = True):
	'''
	Console output for exceptions.
	Use in `except:`: Error = ExceptionOutput(sys.exc_info()).
	Prints Time, ExceptionType, Filename+Line and (if extraInfo in not False) the exception description to the console.
	Returns a string.
	'''
	try:
	print(str(datetime.datetime.now().strftime('%H:%M:%S')),":")
	if exc_info==None:
	exc_type, exc_obj, exc_tb = sys.exc_info()
	else:
	exc_type, exc_obj, exc_tb = exc_info
	fName = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
	if extraInfo:
	print(exc_type, " in", fName, " line", exc_tb.tb_lineno ,": ", exc_obj)
	else:
	print(exc_type, " in", fName, " line", exc_tb.tb_lineno)
	return str(exc_type)+": "+str(exc_obj)
	except exceptions as inst:
	print("An exception occurred while trying to print an exception!")
	print(inst)

	# %% [markdown]
	# Read data from csv-file as a data set and returns array of tuples. The data representation must be in csv format with tab, comma or space delimiters. The data array is represented as vectors along the 1 axis.

	# %%
	def readData(file):
	'''
	Read data from csv-file.
	:param file: name file in current directory.
	:return: array of tuples.
	'''
	data = list() # initialize variable data as list[]
	try:
	with open(f"{file}.csv", "r") as f:
	dataRead = [re.split(',', line.rstrip('\n')) for line in f]
	for i in range(len(dataRead)):
	dataRead[i] = [float(j) for j in dataRead[i]]
	for k in dataRead:
	data.append(tuple(k))
	return data
	except exceptions as exp:
	ExceptionOutput(sys.exc_info())
	# print(exp)
	# print("Tab was set as a separator.")
	try:
	with open(f"{file}.csv", "r") as f:
	# dataRead.clear()
	dataRead = [re.split('\t', line.rstrip('\n')) for line in f]
	for i in range(len(dataRead)):
	dataRead[i] = [float(j) for j in dataRead[i]]
	for k in dataRead:
	data.append(tuple(k))
	return data
	except exceptions as exp:
	ExceptionOutput(sys.exc_info())
	try:
	with open(f"{file}.csv", "r") as f:
	# dataRead.clear()
	dataRead = [re.split(' ', line.rstrip('\n')) for line in f]
	for i in range(len(dataRead)):
	dataRead[i] = [float(j) for j in dataRead[i]]
	for k in dataRead:
	data.append(tuple(k))
	return data
	except exceptions:
	ExceptionOutput(sys.exc_info())

	# %%
	def compute_EuclideanDistance(x, c):
	'''
	One of the methods calculate Euclidean Distance
	of x and c in their vector space.
	:param x: component x from matrix X in formula.
	:param c: component c from matrix C in formula.
	:return: distance of x and c in their vector space.
	'''
	try:
	sum = 0
	for i in range(len(x)):
	sum += pow(x[i] - c[i], 2)
	norma = math.sqrt(sum)
	return norma
	except exceptions:
	ExceptionOutput(sys.exc_info())

	# %%
	def checkConvergence(previous: list, next: list, error: float) -> bool:
	"""
	Function allows you to determine the convergence previoud value and next centroids.
	:return: 'True' if you have reached convergence, 'False' is otherwise.
	"""
	for i in range(len(previous)):
	if compute_EuclideanDistance(previous[i], next[i]) > error:
	return False
	else:
	return True

	# %% [markdown]
	#

	# %%
	def FCM(x: list, c: list, m: int, it: int):
	'''
	Perform Fuzzy C-Means Clustering (FCM) algorithm.
	:param x: imported all data.
	:param c: initialize cluster centers array.
	:param m: hyper- parameter that controls how fuzzy the cluster will be.
	The higher it is, the fuzzier the cluster will be in the end.
	:param it: numper of iterations (termination condition).
	:return: array of cluster centers and w_ij if it reached to a convergence.
	'''
	# set values for the number of cluster C
	u = [[0 for _ in range(len(x))] for _ in range(len(c))]

	new_c = c
	for _ in range(it):
	old_c = new_c
	#
	# Calculate u_ij
	for i in range(len(c)):
	for j in range(len(x)):
	divider = 0
	x_j_minus_c_i = compute_EuclideanDistance(x[j],new_c[i])
	for l in range(len(c)):
	divider += pow(x_j_minus_c_i / compute_EuclideanDistance(x[j],new_c[l]), 2 / (m - 1))
	u[i][j] = 1 / divider

	# Calculate c_i
	new_c = []
	for i in range(len(c)):
	sum = [0] * len(c[0])
	for j in range(len(x)):
	degree = pow(u[i][j], m)
	for l in range(len(c[0])):
	sum[l] += degree * x[j][l]
	_divider = 0
	for q in u[i]:
	_divider += pow(q,m)
	for j in range(len(sum)):
	sum[j] /= _divider
	new_c.append(tuple(sum))
	if checkConvergence(old_c, new_c, 0.001):
	return new_c, u

	return new_c, u

	# TODO: add subtractive clustering algorithm

	# %%