Created
October 8, 2018 12:57
-
-
Save neerajvashistha/ad5c70bce34c92748f61ae8f5d92c5e0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def distance(p1,p2): | |
""" return distance between pony p1 and p2 """ | |
return np.sqrt(np.sum(np.power(p2-p1,2))) | |
def majority_vote(votes): | |
""" | |
return winner for a list of votes | |
or just return scipy.stats.mode(votes) | |
""" | |
from collections import Counter | |
import random | |
vote_count = Counter(votes) | |
winner = [] | |
max_count = max(vote_count.values()) | |
for vote,count in vote_count.items(): | |
if count == max_count: | |
winner.append(vote) | |
return random.choice(winner) | |
def find_nearest_neighbors(p,points,k=5): | |
""" find the k nearest neigh of point p and return indices """ | |
distances = np.zeros(points.shape[0]) | |
for i in range(len(distances)): | |
distances[i] = distance(p,points[i]) | |
ind = np.argsort(distances) | |
return ind[:k] | |
def knn_predict(p,points,outcomes,k=5): | |
""" | |
p : point which you classify | |
points: numpy array consisting of x,y cordinates of n points | |
outcomes: the class of the above n points | |
k: k nearest neigh | |
""" | |
ind = find_nearest_neighbors(p,points,k) | |
return majority_vote(outcomes[ind]) | |
def generate_synthetic_data(n=50): | |
"""create two sets of points from bivariate normal distribution""" | |
import scipy.stats as ss | |
#ss.norm(0,1).rvs((5,2)) | |
points = np.concatenate((ss.norm(0,1).rvs((n,2)),ss.norm(0,1).rvs((n,2))),axis=0) | |
outcomes = np.concatenate((np.repeat(0,n),np.repeat(1,n))) | |
return (points,outcomes) | |
def make_prediction_grid(predictors,outcomes,limits,h,k): | |
""" Classify each point on the prediction grid """ | |
(x_min,x_max,y_min,y_max) = limits | |
xs = np.arange(x_min,x_max,h) | |
ys = np.arange(y_min,y_max,h) | |
xx,yy = np.meshgrid(xs,ys) | |
prediction_grid = np.zeros(xx.shape,dtype=int) | |
for i,x in enumerate(xs): | |
for j,y in enumerate(ys): | |
p=np.array([x,y]) | |
prediction_grid[j,i]=knn_predict(p,predictors,outcomes,k) | |
return (xx,yy,prediction_grid) | |
def plot_prediction_grid (xx, yy, prediction_grid): | |
""" Plot KNN predictions for every point on the grid.""" | |
from matplotlib.colors import ListedColormap | |
background_colormap = ListedColormap (["hotpink","lightskyblue", "yellowgreen"]) | |
observation_colormap = ListedColormap (["red","blue","green"]) | |
plt.figure(figsize =(10,10)) | |
plt.pcolormesh(xx, yy, prediction_grid, cmap = background_colormap, alpha = 0.5) | |
plt.scatter(predictors[:,0], predictors [:,1], c = outcomes, cmap = observation_colormap, s = 50) | |
plt.xlabel('Variable 1'); plt.ylabel('Variable 2') | |
plt.xticks(()); plt.yticks(()) | |
plt.xlim (np.min(xx), np.max(xx)) | |
plt.ylim (np.min(yy), np.max(yy)) | |
plt.show() | |
if __name__ == '__main__': | |
p1= np.array([1,1]) | |
p2= np.array([4,4]) | |
distance(p1,p2) | |
print(majority_vote(np.array([0,1,1,1,3,4,1,3,1,4,1]))) | |
points = np.array([[1,1],[1,2],[2,2],[2,1],[3,1],[3,3],[1,4],[4,4],[2,5]]) | |
p=np.array([2,2.3]) | |
import matplotlib.pyplot as plt | |
plt.plot(points[:,0],points[:,1],"ro") | |
plt.plot(p[0],p[1],"bo") | |
plt.show() | |
points[find_nearest_neighbors(p,points,2)] | |
outcomes = np.array([0,0,0,0,0,1,1,1,1]) | |
print(knn_predict(p,points,outcomes,k=2)) | |
out = knn_predict(p,points,outcomes,k=2) | |
plt.plot(p[0],p[1],"bo") | |
point_0 = points[np.where(outcomes==0)] | |
#plt.plot(point_0[:,0],point_0[:,1],"go") | |
plt.plot(points[np.where(outcomes==0),0],points[np.where(outcomes==0),1],"go") | |
#plt.plot(points[:len(outcomes)/2,0],points[:len(outcomes)/2,1],"go") | |
point_1 = points[np.where(outcomes==1)] | |
plt.plot(point_1[:,0],point_1[:,1],"ro") | |
#plt.plot(points[len(outcomes)/2:,0],points[len(outcomes)/2:,1],"ro") | |
plt.show() | |
if out == 0: | |
plt.plot(p[0],p[1],"go") | |
else: | |
plt.plot(p[0],p[1],"ro") | |
plt.plot(point_0[:,0],point_0[:,1],"go") | |
plt.plot(point_1[:,0],point_1[:,1],"ro") | |
plt.show() | |
n=20 | |
points,outcomes=generate_synthetic_data(n) | |
plt.plot(points[n:,0],points[n:,1],"ro") | |
plt.plot(points[:n,0],points[:n,1],"go") | |
plt.show() | |
(predictors,outcomes) = generate_synthetic_data() | |
k = 5; limits = (-3,4,-3,4); h = 0.1 | |
(xx,yy,prediction_grid)=make_prediction_grid(predictors,outcomes,limits,h,k) | |
plot_prediction_grid(xx,yy,prediction_grid) | |
(predictors,outcomes) = generate_synthetic_data() | |
k = 50; limits = (-3,4,-3,4); h = 0.1 | |
(xx,yy,prediction_grid)=make_prediction_grid(predictors,outcomes,limits,h,k) | |
plot_prediction_grid(xx,yy,prediction_grid) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment