Skip to content

Instantly share code, notes, and snippets.

@ialexpovad
Created February 17, 2023 07:08
Show Gist options
  • Save ialexpovad/379185ea889fd63a17814098d40b71d8 to your computer and use it in GitHub Desktop.
Save ialexpovad/379185ea889fd63a17814098d40b71d8 to your computer and use it in GitHub Desktop.
Implementation Fuzzy C-Means Algorithm
# %% [markdown]
# # Fuzzy C-Means Clustering
# %%
import re
import sys
import datetime
import os
import math
exceptions = (TypeError , SyntaxError, re.error, AttributeError , ValueError , NotImplementedError , Exception , RuntimeError , ImportError)
# https://medium.com/analytics-vidhya/fuzzy-sets-fuzzy-c-means-clustering-algorithm-ac5c4386396b
# https://towardsdatascience.com/fuzzy-c-means-clustering-with-python-f4908c714081
# %%
def ExceptionOutput(exc_info = None, extraInfo = True):
'''
Console output for exceptions.
Use in `except:`: Error = ExceptionOutput(sys.exc_info()).
Prints Time, ExceptionType, Filename+Line and (if extraInfo in not False) the exception description to the console.
Returns a string.
'''
try:
print(str(datetime.datetime.now().strftime('%H:%M:%S')),":")
if exc_info==None:
exc_type, exc_obj, exc_tb = sys.exc_info()
else:
exc_type, exc_obj, exc_tb = exc_info
fName = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
if extraInfo:
print(exc_type, " in", fName, " line", exc_tb.tb_lineno ,": ", exc_obj)
else:
print(exc_type, " in", fName, " line", exc_tb.tb_lineno)
return str(exc_type)+": "+str(exc_obj)
except exceptions as inst:
print("An exception occurred while trying to print an exception!")
print(inst)
# %% [markdown]
# Read data from csv-file as a data set and returns array of tuples. The data representation must be in csv format with tab, comma or space delimiters. The data array is represented as vectors along the 1 axis.
# %%
def readData(file):
'''
Read data from csv-file.
:param file: name file in current directory.
:return: array of tuples.
'''
data = list() # initialize variable data as list[]
try:
with open(f"{file}.csv", "r") as f:
dataRead = [re.split(',', line.rstrip('\n')) for line in f]
for i in range(len(dataRead)):
dataRead[i] = [float(j) for j in dataRead[i]]
for k in dataRead:
data.append(tuple(k))
return data
except exceptions as exp:
ExceptionOutput(sys.exc_info())
# print(exp)
# print("Tab was set as a separator.")
try:
with open(f"{file}.csv", "r") as f:
# dataRead.clear()
dataRead = [re.split('\t', line.rstrip('\n')) for line in f]
for i in range(len(dataRead)):
dataRead[i] = [float(j) for j in dataRead[i]]
for k in dataRead:
data.append(tuple(k))
return data
except exceptions as exp:
ExceptionOutput(sys.exc_info())
try:
with open(f"{file}.csv", "r") as f:
# dataRead.clear()
dataRead = [re.split(' ', line.rstrip('\n')) for line in f]
for i in range(len(dataRead)):
dataRead[i] = [float(j) for j in dataRead[i]]
for k in dataRead:
data.append(tuple(k))
return data
except exceptions:
ExceptionOutput(sys.exc_info())
# %%
def compute_EuclideanDistance(x, c):
'''
One of the methods calculate Euclidean Distance
of x and c in their vector space.
:param x: component x from matrix X in formula.
:param c: component c from matrix C in formula.
:return: distance of x and c in their vector space.
'''
try:
sum = 0
for i in range(len(x)):
sum += pow(x[i] - c[i], 2)
norma = math.sqrt(sum)
return norma
except exceptions:
ExceptionOutput(sys.exc_info())
# %%
def checkConvergence(previous: list, next: list, error: float) -> bool:
"""
Function allows you to determine the convergence previoud value and next centroids.
:return: 'True' if you have reached convergence, 'False' is otherwise.
"""
for i in range(len(previous)):
if compute_EuclideanDistance(previous[i], next[i]) > error:
return False
else:
return True
# %% [markdown]
#
# %%
def FCM(x: list, c: list, m: int, it: int):
'''
Perform Fuzzy C-Means Clustering (FCM) algorithm.
:param x: imported all data.
:param c: initialize cluster centers array.
:param m: hyper- parameter that controls how fuzzy the cluster will be.
The higher it is, the fuzzier the cluster will be in the end.
:param it: numper of iterations (termination condition).
:return: array of cluster centers and w_ij if it reached to a convergence.
'''
# set values for the number of cluster C
u = [[0 for _ in range(len(x))] for _ in range(len(c))]
new_c = c
for _ in range(it):
old_c = new_c
#
# Calculate u_ij
for i in range(len(c)):
for j in range(len(x)):
divider = 0
x_j_minus_c_i = compute_EuclideanDistance(x[j],new_c[i])
for l in range(len(c)):
divider += pow(x_j_minus_c_i / compute_EuclideanDistance(x[j],new_c[l]), 2 / (m - 1))
u[i][j] = 1 / divider
# Calculate c_i
new_c = []
for i in range(len(c)):
sum = [0] * len(c[0])
for j in range(len(x)):
degree = pow(u[i][j], m)
for l in range(len(c[0])):
sum[l] += degree * x[j][l]
_divider = 0
for q in u[i]:
_divider += pow(q,m)
for j in range(len(sum)):
sum[j] /= _divider
new_c.append(tuple(sum))
if checkConvergence(old_c, new_c, 0.001):
return new_c, u
return new_c, u
# TODO: add subtractive clustering algorithm
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment