Created
February 17, 2023 07:08
-
-
Save ialexpovad/379185ea889fd63a17814098d40b71d8 to your computer and use it in GitHub Desktop.
Implementation Fuzzy C-Means Algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% [markdown] | |
# # Fuzzy C-Means Clustering | |
# %% | |
import re | |
import sys | |
import datetime | |
import os | |
import math | |
exceptions = (TypeError , SyntaxError, re.error, AttributeError , ValueError , NotImplementedError , Exception , RuntimeError , ImportError) | |
# https://medium.com/analytics-vidhya/fuzzy-sets-fuzzy-c-means-clustering-algorithm-ac5c4386396b | |
# https://towardsdatascience.com/fuzzy-c-means-clustering-with-python-f4908c714081 | |
# %% | |
def ExceptionOutput(exc_info = None, extraInfo = True): | |
''' | |
Console output for exceptions. | |
Use in `except:`: Error = ExceptionOutput(sys.exc_info()). | |
Prints Time, ExceptionType, Filename+Line and (if extraInfo in not False) the exception description to the console. | |
Returns a string. | |
''' | |
try: | |
print(str(datetime.datetime.now().strftime('%H:%M:%S')),":") | |
if exc_info==None: | |
exc_type, exc_obj, exc_tb = sys.exc_info() | |
else: | |
exc_type, exc_obj, exc_tb = exc_info | |
fName = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] | |
if extraInfo: | |
print(exc_type, " in", fName, " line", exc_tb.tb_lineno ,": ", exc_obj) | |
else: | |
print(exc_type, " in", fName, " line", exc_tb.tb_lineno) | |
return str(exc_type)+": "+str(exc_obj) | |
except exceptions as inst: | |
print("An exception occurred while trying to print an exception!") | |
print(inst) | |
# %% [markdown] | |
# Read data from csv-file as a data set and returns array of tuples. The data representation must be in csv format with tab, comma or space delimiters. The data array is represented as vectors along the 1 axis. | |
# %% | |
def readData(file): | |
''' | |
Read data from csv-file. | |
:param file: name file in current directory. | |
:return: array of tuples. | |
''' | |
data = list() # initialize variable data as list[] | |
try: | |
with open(f"{file}.csv", "r") as f: | |
dataRead = [re.split(',', line.rstrip('\n')) for line in f] | |
for i in range(len(dataRead)): | |
dataRead[i] = [float(j) for j in dataRead[i]] | |
for k in dataRead: | |
data.append(tuple(k)) | |
return data | |
except exceptions as exp: | |
ExceptionOutput(sys.exc_info()) | |
# print(exp) | |
# print("Tab was set as a separator.") | |
try: | |
with open(f"{file}.csv", "r") as f: | |
# dataRead.clear() | |
dataRead = [re.split('\t', line.rstrip('\n')) for line in f] | |
for i in range(len(dataRead)): | |
dataRead[i] = [float(j) for j in dataRead[i]] | |
for k in dataRead: | |
data.append(tuple(k)) | |
return data | |
except exceptions as exp: | |
ExceptionOutput(sys.exc_info()) | |
try: | |
with open(f"{file}.csv", "r") as f: | |
# dataRead.clear() | |
dataRead = [re.split(' ', line.rstrip('\n')) for line in f] | |
for i in range(len(dataRead)): | |
dataRead[i] = [float(j) for j in dataRead[i]] | |
for k in dataRead: | |
data.append(tuple(k)) | |
return data | |
except exceptions: | |
ExceptionOutput(sys.exc_info()) | |
# %% | |
def compute_EuclideanDistance(x, c): | |
''' | |
One of the methods calculate Euclidean Distance | |
of x and c in their vector space. | |
:param x: component x from matrix X in formula. | |
:param c: component c from matrix C in formula. | |
:return: distance of x and c in their vector space. | |
''' | |
try: | |
sum = 0 | |
for i in range(len(x)): | |
sum += pow(x[i] - c[i], 2) | |
norma = math.sqrt(sum) | |
return norma | |
except exceptions: | |
ExceptionOutput(sys.exc_info()) | |
# %% | |
def checkConvergence(previous: list, next: list, error: float) -> bool: | |
""" | |
Function allows you to determine the convergence previoud value and next centroids. | |
:return: 'True' if you have reached convergence, 'False' is otherwise. | |
""" | |
for i in range(len(previous)): | |
if compute_EuclideanDistance(previous[i], next[i]) > error: | |
return False | |
else: | |
return True | |
# %% [markdown] | |
# | |
# %% | |
def FCM(x: list, c: list, m: int, it: int): | |
''' | |
Perform Fuzzy C-Means Clustering (FCM) algorithm. | |
:param x: imported all data. | |
:param c: initialize cluster centers array. | |
:param m: hyper- parameter that controls how fuzzy the cluster will be. | |
The higher it is, the fuzzier the cluster will be in the end. | |
:param it: numper of iterations (termination condition). | |
:return: array of cluster centers and w_ij if it reached to a convergence. | |
''' | |
# set values for the number of cluster C | |
u = [[0 for _ in range(len(x))] for _ in range(len(c))] | |
new_c = c | |
for _ in range(it): | |
old_c = new_c | |
# | |
# Calculate u_ij | |
for i in range(len(c)): | |
for j in range(len(x)): | |
divider = 0 | |
x_j_minus_c_i = compute_EuclideanDistance(x[j],new_c[i]) | |
for l in range(len(c)): | |
divider += pow(x_j_minus_c_i / compute_EuclideanDistance(x[j],new_c[l]), 2 / (m - 1)) | |
u[i][j] = 1 / divider | |
# Calculate c_i | |
new_c = [] | |
for i in range(len(c)): | |
sum = [0] * len(c[0]) | |
for j in range(len(x)): | |
degree = pow(u[i][j], m) | |
for l in range(len(c[0])): | |
sum[l] += degree * x[j][l] | |
_divider = 0 | |
for q in u[i]: | |
_divider += pow(q,m) | |
for j in range(len(sum)): | |
sum[j] /= _divider | |
new_c.append(tuple(sum)) | |
if checkConvergence(old_c, new_c, 0.001): | |
return new_c, u | |
return new_c, u | |
# TODO: add subtractive clustering algorithm | |
# %% | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment