Skip to content

Instantly share code, notes, and snippets.

@KalimAmzad
Last active March 14, 2023 11:28
Show Gist options
  • Select an option

  • Save KalimAmzad/1f34476d44d55c892ec618f0b5f4b73c to your computer and use it in GitHub Desktop.

Select an option

Save KalimAmzad/1f34476d44d55c892ec618f0b5f4b73c to your computer and use it in GitHub Desktop.
KNN Implementaion
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from collections import Counter
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2)**2))
def manhattan_distance(x1, x2):
return np.sum(np.abs(x1 - x2))
def minkowski_distance(x1, x2, p=3):
return np.power(np.sum(np.power(np.abs(x1 - x2), p)), 1/p)
def cosine_distance(x1, x2):
return 1 - cosine_similarity([x1], [x2])[0][0]
def knn(x_train, y_train, x_val, k, distance_measure='euclidean'):
if distance_measure == 'euclidean':
distance_fn = euclidean_distance
elif distance_measure == 'manhattan':
distance_fn = manhattan_distance
elif distance_measure == 'minkowski':
distance_fn = lambda x1, x2: minkowski_distance(x1, x2, p=3)
elif distance_measure == 'cosine':
distance_fn = cosine_distance
else:
raise ValueError("Invalid distance measure")
distances = []
for i in range(len(x_train)):
distance = distance_fn(x_train[i], x_val)
distances.append((distance, y_train[i]))
distances = sorted(distances)
targets = [y for _, y in distances[:k]]
return Counter(targets).most_common(1)[0][0]
# Load the iris dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
# Assign the features and labels to X and y respectively
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Set the number of neighbors to consider (k)
k = 3
# Set the number of folds for k-fold cross-validation
k_folds = 5
# Set the distance measure to use
distance_measure = 'euclidean'
# Initialize the cross-validation folds
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
# Initialize the accuracy scores
val_accuracies = []
# Perform k-fold cross-validation
for train_index, val_index in kfold.split(X_train):
X_cv_train, X_cv_val = X_train[train_index], X_train[val_index]
y_cv_train, y_cv_val = y_train[train_index], y_train[val_index]
val_predictions = []
for x in X_cv_val:
val_predictions.append(knn(X_cv_train, y_cv_train, x, k, distance_measure))
val_accuracy = np.mean(val_predictions == y_cv_val)
val_accuracies.append(val_accuracy)
# Calculate the average validation accuracy score
avg_val_accuracy = np.mean(val_accuracies)
print("Average Validation Accuracy: {:.2f}%".format(avg_val_accuracy * 100))
# Calculate the test accuracy
test_predictions = []
for x in X_test:
test_predictions.append(knn(X_train, y_train, x, k, distance_measure))
test_accuracy = np.mean(test_predictions == y_test)
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))
# Calculate the confusion matrix on the test data
cm = confusion_matrix(y_test, test_predictions)
print("Confusion Matrix:")
print(cm)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment