Skip to content

Instantly share code, notes, and snippets.

@raryo
Created March 21, 2020 13:29
Show Gist options
  • Save raryo/f755b3f89eba3e084082dc9d57ebb68f to your computer and use it in GitHub Desktop.
Save raryo/f755b3f89eba3e084082dc9d57ebb68f to your computer and use it in GitHub Desktop.
MF(Matrix Factorization;行列分解)の練習。
using LinearAlgebra
function main()
# input data
train_data = read_data("./ml-100k/u.data")
n_user = length(unique([t[1] for t in train_data]))
n_item = length(unique([t[2] for t in train_data]))
# parameters
P, Q = fit(n_user, n_item, train_data)
end
function read_data(file_path)
f = open(file_path)
data = readlines(f)
train_data = []
for l in data
u, i, r, ts = [parse(Float64, x) for x in split(l, "\t")]
append!(train_data, [(Int(u), Int(i), r)])
end
return train_data
end
function fit(n_user, n_item, train_data, n_itr=50, n_fac=5, γ=0.07, λ=0.01)
# init parameters
P = randn(Float16, n_user, n_fac)
Q = randn(Float16, n_item, n_fac)
# optimaize: SGD
for itr in 1:n_itr
loss = 0
for (u, i, r) in train_data
# calc error
pu, qi = P[u, :], Q[i, :]
e = r - pu ⋅ qi
Q[i, :] += γ * (e * pu - λ * qi)
P[u, :] += γ * (e * qi - λ * pu)
# calc loss
loss += e*e + λ * (P[u, :] ⋅ P[u, :] + Q[i, :] ⋅ Q[i, :])
end
println("$itr: $loss")
end
return P, Q
end
main()
import numpy as np
import numba
from numba import njit, jit
from numba.typed import List
import sys
class MFEstimator():
def __init__(self, n_user, n_item, n_itr=50, n_fac=5, γ=0.07, λ=0.01, met='RMSE'):
self.n_user = n_user
self.n_item = n_item
self.n_itr = n_itr
self.n_fac = n_fac
self.γ = γ
self.λ = λ
self.met = met
# initialize output matrix
self.P = np.random.normal(scale=0.0001, size=(self.n_user, self.n_fac))
self.Q = np.random.normal(scale=0.0001, size=(self.n_item, self.n_fac))
def fit(self, tdata):
tdata = tdata.astype(np.float32)
self.P, self.Q = MFEstimator.sgd(tdata, self.P, self.Q, self.γ, self.λ, self.n_itr)
@staticmethod
def sgd(tdata, P, Q, γ, λ, n_itr):
x = 1
for itr in range(n_itr):
loss = 0.0
for j in range(len(tdata)):
u, i, r = int(tdata[j, 0]), int(tdata[j, 1]), tdata[j, 2]
# calc diff
pu, qi = P[u, :].copy(), Q[i, :].copy()
e = r - pu @ qi
# update
Q[i] += γ * (e * pu - λ * qi)
P[u] += γ * (e * qi - λ * pu)
# calcluate loss
loss += e * e + λ * (np.sum(P[u] ** 2)+ np.sum(Q[i] ** 2))
print(itr, loss)
return P, Q
def main():
uir = np.loadtxt('./ml-100k/u.data', dtype=np.float)
uir[:, 0:2] -= 1
n_user, n_item = 943, 1682
model = MFEstimator(n_user, n_item)
model.fit(uir)
main()
import numpy as np
import numba
from numba import njit, jit
import sys
class MFEstimator():
def __init__(self, n_user, n_item, n_itr=50, n_fac=5, γ=0.07, λ=0.01, met='RMSE'):
self.n_user = n_user
self.n_item = n_item
self.n_itr = n_itr
self.n_fac = n_fac
self.γ = γ
self.λ = λ
self.met = met
# initialize output matrix
self.P = np.random.normal(scale=0.0001, size=(self.n_user, self.n_fac))
self.Q = np.random.normal(scale=0.0001, size=(self.n_item, self.n_fac))
def fit(self, tdata):
tdata = tdata.astype(np.float32)
self.P, self.Q = MFEstimator.sgd(tdata, self.P, self.Q, self.γ, self.λ, self.n_itr)
@staticmethod
@njit
def sgd(tdata, P, Q, γ, λ, n_itr):
x = 1
for itr in range(n_itr):
loss = 0.0
for j in range(len(tdata)):
u, i, r = int(tdata[j, 0]), int(tdata[j, 1]), tdata[j, 2]
# calc diff
pu, qi = P[u, :].copy(), Q[i, :].copy()
e = r - pu @ qi
# update
Q[i] += γ * (e * pu - λ * qi)
P[u] += γ * (e * qi - λ * pu)
# calcluate loss
loss += e * e + λ * (np.sum(P[u] ** 2)+ np.sum(Q[i] ** 2))
print(itr, loss)
return P, Q
DATA_PATH = './ml-100k/u.data'
def main():
uir = np.loadtxt('./ml-100k/u.data', dtype=np.float)
uir[:, 0:2] -= 1
n_user, n_item = 943, 1682
model = MFEstimator(n_user, n_item)
model.fit(uir)
main()
using LinearAlgebra
function main()
# input data
train_data = read_data("./ml-100k/u.data")
n_user = length(unique([t[1] for t in train_data]))
n_item = length(unique([t[2] for t in train_data]))
γ::Float64 = 0.07
λ::Float64 = 0.01
# parameters
P, Q = fit(n_user, n_item, train_data, 50, 5, γ, λ)
end
function read_data(file_path)
f = open(file_path)
data = readlines(f)
train_data::Array{Tuple{Int64,Int64,Float64}, 1} = []
for l in data
u, i, r, ts = [parse(Float64, x) for x in split(l, "\t")]
append!(train_data, [(Int(u), Int(i), r)])
end
return train_data
end
function fit(n_user::Int64,
n_item::Int64,
train_data::Array{Tuple{Int64,Int64,Float64}, 1},
n_itr::Int64, n_fac::Int64, γ::Float64,
λ::Float64) ::Tuple{Array{Float64,2},Array{Float64,2}}
# init parameters
P::Array{Float64,2} = randn(Float64, n_user, n_fac)
Q::Array{Float64,2} = randn(Float64, n_item, n_fac)
# optimaize: SGD
for itr in 1:n_itr
loss = 0
for (u, i, r) in train_data
# calc error
pu, qi = P[u, :], Q[i, :]
e = r - pu ⋅ qi
Q[i, :] += γ * (e * pu - λ * qi)
P[u, :] += γ * (e * qi - λ * pu)
# calc loss
loss += e*e + λ * (P[u, :] ⋅ P[u, :] + Q[i, :] ⋅ Q[i, :])
end
println("$itr: $loss")
end
return P, Q
end
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment