Last active
March 14, 2023 11:28
-
-
Save KalimAmzad/1f34476d44d55c892ec618f0b5f4b73c to your computer and use it in GitHub Desktop.
KNN Implementaion
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| from sklearn.model_selection import KFold | |
| from collections import Counter | |
| def euclidean_distance(x1, x2): | |
| return np.sqrt(np.sum((x1 - x2)**2)) | |
| def manhattan_distance(x1, x2): | |
| return np.sum(np.abs(x1 - x2)) | |
| def minkowski_distance(x1, x2, p=3): | |
| return np.power(np.sum(np.power(np.abs(x1 - x2), p)), 1/p) | |
| def cosine_distance(x1, x2): | |
| return 1 - cosine_similarity([x1], [x2])[0][0] | |
| def knn(x_train, y_train, x_val, k, distance_measure='euclidean'): | |
| if distance_measure == 'euclidean': | |
| distance_fn = euclidean_distance | |
| elif distance_measure == 'manhattan': | |
| distance_fn = manhattan_distance | |
| elif distance_measure == 'minkowski': | |
| distance_fn = lambda x1, x2: minkowski_distance(x1, x2, p=3) | |
| elif distance_measure == 'cosine': | |
| distance_fn = cosine_distance | |
| else: | |
| raise ValueError("Invalid distance measure") | |
| distances = [] | |
| for i in range(len(x_train)): | |
| distance = distance_fn(x_train[i], x_val) | |
| distances.append((distance, y_train[i])) | |
| distances = sorted(distances) | |
| targets = [y for _, y in distances[:k]] | |
| return Counter(targets).most_common(1)[0][0] | |
| # Load the iris dataset | |
| data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) | |
| # Assign the features and labels to X and y respectively | |
| X = data.iloc[:, :-1].values | |
| y = data.iloc[:, -1].values | |
| # Split the data into training and test sets | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Set the number of neighbors to consider (k) | |
| k = 3 | |
| # Set the number of folds for k-fold cross-validation | |
| k_folds = 5 | |
| # Set the distance measure to use | |
| distance_measure = 'euclidean' | |
| # Initialize the cross-validation folds | |
| kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42) | |
| # Initialize the accuracy scores | |
| val_accuracies = [] | |
| # Perform k-fold cross-validation | |
| for train_index, val_index in kfold.split(X_train): | |
| X_cv_train, X_cv_val = X_train[train_index], X_train[val_index] | |
| y_cv_train, y_cv_val = y_train[train_index], y_train[val_index] | |
| val_predictions = [] | |
| for x in X_cv_val: | |
| val_predictions.append(knn(X_cv_train, y_cv_train, x, k, distance_measure)) | |
| val_accuracy = np.mean(val_predictions == y_cv_val) | |
| val_accuracies.append(val_accuracy) | |
| # Calculate the average validation accuracy score | |
| avg_val_accuracy = np.mean(val_accuracies) | |
| print("Average Validation Accuracy: {:.2f}%".format(avg_val_accuracy * 100)) | |
| # Calculate the test accuracy | |
| test_predictions = [] | |
| for x in X_test: | |
| test_predictions.append(knn(X_train, y_train, x, k, distance_measure)) | |
| test_accuracy = np.mean(test_predictions == y_test) | |
| print("Test Accuracy: {:.2f}%".format(test_accuracy * 100)) | |
| # Calculate the confusion matrix on the test data | |
| cm = confusion_matrix(y_test, test_predictions) | |
| print("Confusion Matrix:") | |
| print(cm) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment