Last active
November 17, 2015 08:05
-
-
Save rajarsheem/96c03a17197d656c0905 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
baseball | economics | politics | Europe | Asia | soccer | war | security | shopping | family | num-attr | User 1 | User 2 | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
doc1 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 5 | 1 | -1 | |
doc2 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | -1 | 1 | |
doc3 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 3 | |||
doc4 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 4 | 1 | ||
doc5 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 3 | |||
doc6 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | ||
doc7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 2 | |||
doc8 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 4 | |||
doc9 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 2 | |||
doc10 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 3 | |||
doc11 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 3 | |||
doc12 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3 | -1 | ||
doc13 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 4 | |||
doc14 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 4 | |||
doc15 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 4 | |||
doc16 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 3 | 1 | ||
doc17 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | 1 | ||
doc18 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | |||
doc19 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 5 | -1 | ||
doc20 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 4 | |||
DF | 4 | 6 | 10 | 11 | 6 | 6 | 7 | 6 | 7 | 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import numpy as np | |
import math | |
def col(i,lis): | |
return [r[i] for r in lis][:-1] | |
def predict(l,n,d,idf=1): | |
profile1, profile2, docscore1,docscore2 = [],[],[],[] | |
for i in range(1,d-3): | |
c = col(i,l) | |
profile1.append(np.dot(c,col(d-2,l))) | |
profile2.append(np.dot(c,col(d-1,l))) | |
print ("profiles") | |
print (profile1) | |
print (profile2) | |
if idf is 1: | |
docscore1 = [np.dot(profile1,r[1:-3]) for r in l[:-1]] | |
docscore2 = [np.dot(profile2,r[1:-3]) for r in l[:-1]] | |
else: | |
print ("debug",len(idf),len(profile1)) | |
docscore1 = [sum([x*y*z for x,y,z in zip(profile1,r[1:-3],idf)]) for r in l[:-1]] | |
docscore2 = [sum([x*y*z for x,y,z in zip(profile2,r[1:-3],idf)]) for r in l[:-1]] | |
print ("doc scores") | |
print (docscore1) | |
print (docscore2) | |
print ("user1 will like document {d} (score = {s}) best".format(d=docscore1.index(max(docscore1))+1,s=max(docscore1))) | |
print ("user1 will like document {d} (score = {s}) 2nd best".format(d=docscore1.index(sorted(docscore1)[-2])+1,s=sorted(docscore1)[-2])) | |
print ("user1's prediction for doc9 is",docscore1[8]) | |
print ("user2 will like document {d} (score = {s}) best".format(d=docscore2.index(max(docscore2))+1,s=max(docscore2))) | |
print ("No. of docs user2 will dislike is ",sum(1 for x in docscore2 if x < 0)) | |
with open('ml-latest-small/ass.csv') as f: | |
data = csv.reader(f,delimiter=',') | |
next(data) | |
l = list(data) | |
n = len(l) | |
d = len(l[0]) | |
for i in range(n): | |
for j in range(1,d): | |
l[i][j] = int(l[i][j]) if l[i][j]!='' else 0 | |
predict(l,n,d) | |
for r in l[:-1]: | |
r[1:-3] = [x/math.sqrt(r[-3]) for x in r[1:-3]] | |
predict(l,n,d) | |
idf = [1/x for x in l[-1][1:-3]] | |
predict(l,n,d,idf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment