Skip to content

Instantly share code, notes, and snippets.

@neerajvashistha
Created October 8, 2018 12:57
Show Gist options
  • Save neerajvashistha/ad5c70bce34c92748f61ae8f5d92c5e0 to your computer and use it in GitHub Desktop.
Save neerajvashistha/ad5c70bce34c92748f61ae8f5d92c5e0 to your computer and use it in GitHub Desktop.
import numpy as np
def distance(p1,p2):
""" return distance between pony p1 and p2 """
return np.sqrt(np.sum(np.power(p2-p1,2)))
def majority_vote(votes):
"""
return winner for a list of votes
or just return scipy.stats.mode(votes)
"""
from collections import Counter
import random
vote_count = Counter(votes)
winner = []
max_count = max(vote_count.values())
for vote,count in vote_count.items():
if count == max_count:
winner.append(vote)
return random.choice(winner)
def find_nearest_neighbors(p,points,k=5):
""" find the k nearest neigh of point p and return indices """
distances = np.zeros(points.shape[0])
for i in range(len(distances)):
distances[i] = distance(p,points[i])
ind = np.argsort(distances)
return ind[:k]
def knn_predict(p,points,outcomes,k=5):
"""
p : point which you classify
points: numpy array consisting of x,y cordinates of n points
outcomes: the class of the above n points
k: k nearest neigh
"""
ind = find_nearest_neighbors(p,points,k)
return majority_vote(outcomes[ind])
def generate_synthetic_data(n=50):
"""create two sets of points from bivariate normal distribution"""
import scipy.stats as ss
#ss.norm(0,1).rvs((5,2))
points = np.concatenate((ss.norm(0,1).rvs((n,2)),ss.norm(0,1).rvs((n,2))),axis=0)
outcomes = np.concatenate((np.repeat(0,n),np.repeat(1,n)))
return (points,outcomes)
def make_prediction_grid(predictors,outcomes,limits,h,k):
""" Classify each point on the prediction grid """
(x_min,x_max,y_min,y_max) = limits
xs = np.arange(x_min,x_max,h)
ys = np.arange(y_min,y_max,h)
xx,yy = np.meshgrid(xs,ys)
prediction_grid = np.zeros(xx.shape,dtype=int)
for i,x in enumerate(xs):
for j,y in enumerate(ys):
p=np.array([x,y])
prediction_grid[j,i]=knn_predict(p,predictors,outcomes,k)
return (xx,yy,prediction_grid)
def plot_prediction_grid (xx, yy, prediction_grid):
""" Plot KNN predictions for every point on the grid."""
from matplotlib.colors import ListedColormap
background_colormap = ListedColormap (["hotpink","lightskyblue", "yellowgreen"])
observation_colormap = ListedColormap (["red","blue","green"])
plt.figure(figsize =(10,10))
plt.pcolormesh(xx, yy, prediction_grid, cmap = background_colormap, alpha = 0.5)
plt.scatter(predictors[:,0], predictors [:,1], c = outcomes, cmap = observation_colormap, s = 50)
plt.xlabel('Variable 1'); plt.ylabel('Variable 2')
plt.xticks(()); plt.yticks(())
plt.xlim (np.min(xx), np.max(xx))
plt.ylim (np.min(yy), np.max(yy))
plt.show()
if __name__ == '__main__':
p1= np.array([1,1])
p2= np.array([4,4])
distance(p1,p2)
print(majority_vote(np.array([0,1,1,1,3,4,1,3,1,4,1])))
points = np.array([[1,1],[1,2],[2,2],[2,1],[3,1],[3,3],[1,4],[4,4],[2,5]])
p=np.array([2,2.3])
import matplotlib.pyplot as plt
plt.plot(points[:,0],points[:,1],"ro")
plt.plot(p[0],p[1],"bo")
plt.show()
points[find_nearest_neighbors(p,points,2)]
outcomes = np.array([0,0,0,0,0,1,1,1,1])
print(knn_predict(p,points,outcomes,k=2))
out = knn_predict(p,points,outcomes,k=2)
plt.plot(p[0],p[1],"bo")
point_0 = points[np.where(outcomes==0)]
#plt.plot(point_0[:,0],point_0[:,1],"go")
plt.plot(points[np.where(outcomes==0),0],points[np.where(outcomes==0),1],"go")
#plt.plot(points[:len(outcomes)/2,0],points[:len(outcomes)/2,1],"go")
point_1 = points[np.where(outcomes==1)]
plt.plot(point_1[:,0],point_1[:,1],"ro")
#plt.plot(points[len(outcomes)/2:,0],points[len(outcomes)/2:,1],"ro")
plt.show()
if out == 0:
plt.plot(p[0],p[1],"go")
else:
plt.plot(p[0],p[1],"ro")
plt.plot(point_0[:,0],point_0[:,1],"go")
plt.plot(point_1[:,0],point_1[:,1],"ro")
plt.show()
n=20
points,outcomes=generate_synthetic_data(n)
plt.plot(points[n:,0],points[n:,1],"ro")
plt.plot(points[:n,0],points[:n,1],"go")
plt.show()
(predictors,outcomes) = generate_synthetic_data()
k = 5; limits = (-3,4,-3,4); h = 0.1
(xx,yy,prediction_grid)=make_prediction_grid(predictors,outcomes,limits,h,k)
plot_prediction_grid(xx,yy,prediction_grid)
(predictors,outcomes) = generate_synthetic_data()
k = 50; limits = (-3,4,-3,4); h = 0.1
(xx,yy,prediction_grid)=make_prediction_grid(predictors,outcomes,limits,h,k)
plot_prediction_grid(xx,yy,prediction_grid)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment