Skip to content

Instantly share code, notes, and snippets.

@saliksyed
Created August 17, 2017 07:46
Show Gist options
  • Save saliksyed/32e01d87630c6a2280b11f52b403465b to your computer and use it in GitHub Desktop.
Save saliksyed/32e01d87630c6a2280b11f52b403465b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 17 10:02:54 2017
@author: saliksyed
"""
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
features = iris.data[:, :]
labels = iris.target
def dist(pt1, pt2):
return np.linalg.norm(pt1 - pt2)
def classify_point(query_point, training_data, k=3):
# compute the distance between query and example
dists = []
for point in training_data:
dists.append(dist(point, query_point))
labels_with_distances = zip(dists, labels)
k_neighbors = sorted(labels_with_distances, key=lambda x : x[0])[:k]
count = {}
for neighbor in k_neighbors:
if not neighbor[1] in count:
count[neighbor[1]] = 0
count[neighbor[1]] += 1
# pick the label that has the highest count!
# key = the label
# value = the count
sorted_counts = sorted(zip(count.keys(), count.values()), key=lambda x : x[1])
return sorted_counts[0][0]
print classify_point(np.array([ 6.5, 3., 5.2, 2. ]), features)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment