Last active
March 27, 2016 23:40
-
-
Save jamesthomson/35f65a6f6e857bc54ec9 to your computer and use it in GitHub Desktop.
aws version of the lastfm recommendations in spark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#in terminal connect ot the master node | |
ssh [email protected] -i ~/aws_key_pair.pem | |
#then fire up spark | |
MASTER=yarn-client /home/hadoop/spark/bin/pyspark | |
lines = sc.textFile('s3n://jthomson/lastfm_listens/listens/usersha1-artmbid-artname-plays.tsv') | |
data = lines.map(lambda l: l.split('\t')) | |
ratings = data.map(lambda d: (d[0], d[2], 1)) | |
users_lkp = ratings.map(lambda s: s[0]).distinct().zipWithUniqueId() | |
items_lkp = ratings.map(lambda s: s[1]).distinct().zipWithUniqueId() | |
repArtist=ratings.map(lambda (u,a,r):(a,(u,r))).join(items_lkp).map(lambda (a,((u,r),i)):(u,i,r)) | |
repUser=repArtist.map(lambda (u,a,r):(u,(a,r))).join(users_lkp).map(lambda (u,((a,r),i)):(i,a,r)) | |
from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating | |
rank = 20 | |
numIterations = 10 | |
model = ALS.trainImplicit(repUser, rank, numIterations, 0.01) | |
#create recs for specific users | |
#find some shuggie otis fans | |
ratings.filter(lambda x:x[1]=='shuggie otis').top(10) | |
#pick one at random and find user id | |
users_lkp.filter(lambda x:x[0]=='fd3c74ac50f8ffc0089caa3cad8bc7a5997af48e').collect() | |
#have a look at what they listened to | |
ratings.filter(lambda x:x[0]=='fd3c74ac50f8ffc0089caa3cad8bc7a5997af48e').map(lambda x: (x[1])).collect() | |
#generate top 5 predictions | |
userArtist=items_lkp.map(lambda (a,i):(213489, i)) | |
userPred=model.predictAll(userArtist).map(lambda r: (r[1], r[2])).join(items_lkp.map(lambda (a,i):(i,a))).map(lambda (i,(r,a)):((a,r))) | |
userPred.takeOrdered(5, key=lambda x: -x[1]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment