-
-
Save joskid/da82b299a96ffa9beaa2c93e0344335d to your computer and use it in GitHub Desktop.
Recommendation using ALS for implicit data. Code for Medium Blog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Sun Jun 23 22:20:58 2019 | |
@author: himansh | |
""" | |
#import libraries | |
import sys | |
import pandas as pd | |
import numpy as np | |
import scipy.sparse as sparse | |
from scipy.sparse.linalg import spsolve | |
import random | |
from sklearn.preprocessing import MinMaxScaler | |
import implicit | |
from datetime import datetime, timedelta | |
#Data Preprocessing | |
def create_data(datapath,start_date,end_date): | |
df=pd.read_csv(datapath) | |
df=df.assign(date=pd.Series(datetime.fromtimestamp(a/1000).date() for a in df.timestamp)) | |
df=df.sort_values(by='date').reset_index(drop=True) # for some reasons RetailRocket did NOT sort data by date | |
df=df[(df.date>=datetime.strptime(start_date,'%Y-%m-%d').date())&(df.date<=datetime.strptime(end_date,'%Y-%m-%d').date())] | |
df=df[['visitorid','itemid','event']] | |
return df | |
#Download the kaggle RetailRocket data and give the events.csv file path | |
datapath= 'events.csv' | |
data=create_data(datapath,'2015-5-3','2015-5-18') | |
data['visitorid'] = data['visitorid'].astype("category") | |
data['itemid'] = data['itemid'].astype("category") | |
data['visitor_id'] = data['visitorid'].cat.codes | |
data['item_id'] = data['itemid'].cat.codes | |
data['event']=data['event'].astype('category') | |
data['event']=data['event'].cat.codes | |
sparse_item_user = sparse.csr_matrix((data['event'].astype(float), (data['item_id'], data['visitor_id']))) | |
sparse_user_item = sparse.csr_matrix((data['event'].astype(float), (data['visitor_id'], data['item_id']))) | |
#Building the model | |
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20) | |
alpha_val = 40 | |
data_conf = (sparse_item_user * alpha_val).astype('double') | |
model.fit(data_conf) | |
###USING THE MODEL | |
#Get Recommendations | |
user_id = 14 | |
recommended = model.recommend(user_id, sparse_user_item) | |
print(recommended) | |
#Get similar items | |
item_id = 7 | |
n_similar = 3 | |
similar = model.similar_items(item_id, n_similar) | |
print(similar) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment