Created
November 17, 2017 02:44
-
-
Save halegreen/9e5e983539c460a4e3c1c1b23371f465 to your computer and use it in GitHub Desktop.
Python implementation of Field-aware Factorization Machine Model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime | |
from csv import DictReader | |
from math import exp, log, sqrt,pow | |
import itertools | |
import math | |
from random import random,shuffle,uniform,seed | |
import pickle | |
import sys | |
seed(1024) | |
def data_generator(path,no_norm=False,task='c'): | |
data = open(path,'r') | |
for row in data: | |
row = row.strip().split(" ") | |
y = float(row[0]) | |
row = row[1:] | |
x = [] | |
for feature in row: | |
feature = feature.split(":") | |
idx = int(feature[0]) | |
value = float(feature[1]) | |
x.append([idx,value]) | |
if not no_norm: | |
r = 0.0 | |
for i in range(len(x)): | |
r+=x[i][1]*x[i][1] | |
for i in range(len(x)): | |
x[i][1] /=r | |
if task=='c': | |
if y ==0.0: | |
y = -1.0 | |
yield x,y | |
def dot(u,v): | |
u_v = 0. | |
len_u = len(u) | |
for idx in range(len_u): | |
uu = u[idx] | |
vv = v[idx] | |
u_v+=uu*vv | |
return u_v | |
def mse_loss_function(y,p): | |
return (y - p)**2 | |
def mae_loss_function(y,p): | |
y = exp(y) | |
p = exp(p) | |
return abs(y - p) | |
def log_loss_function(y,p): | |
return -(y*log(p)+(1-y)*log(1-p)) | |
def exponential_loss_function(y,p): | |
return log(1+exp(-y*p)) | |
def sigmoid(inX): | |
return 1/(1+exp(-inX)) | |
def bounded_sigmoid(inX): | |
return 1. / (1. + exp(-max(min(inX, 35.), -35.))) | |
class SGD(object): | |
def __init__(self,lr=0.001,momentum=0.9,nesterov=True,adam=False,l2=0.0,l2_fm=0.0,l2_bias=0.0,ini_stdev= 0.01,dropout=0.5,task='c',n_components=4,nb_epoch=5,interaction=False,no_norm=False): | |
self.W = [] | |
self.V = [] | |
self.bias = uniform(-ini_stdev, ini_stdev) | |
self.n_components=n_components | |
self.lr = lr | |
self.l2 = l2 | |
self.l2_fm = l2_fm | |
self.l2_bias = l2_bias | |
self.momentum = momentum | |
self.nesterov = nesterov | |
self.adam = adam | |
self.nb_epoch = nb_epoch | |
self.ini_stdev = ini_stdev | |
self.task = task | |
self.interaction = interaction | |
self.dropout = dropout | |
self.no_norm = no_norm | |
if self.task!='c': | |
# self.loss_function = mse_loss_function | |
self.loss_function = mae_loss_function | |
else: | |
self.loss_function = exponential_loss_function | |
# self.loss_function = log_loss_function | |
def preload(self,train,test): | |
train = data_generator(train,self.no_norm,self.task) | |
dim = 0 | |
count = 0 | |
for x,y in train: | |
for i in x: | |
idx,value = i | |
if idx >dim: | |
dim = idx | |
count+=1 | |
print('Training samples:',count) | |
test = data_generator(test,self.no_norm,self.task) | |
count=0 | |
for x,y in test: | |
for i in x: | |
idx,value = i | |
if idx >dim: | |
dim = idx | |
count+=1 | |
print('Testing samples:',count) | |
dim = dim+1 | |
print("Number of features:",dim) | |
self.W = [uniform(-self.ini_stdev, self.ini_stdev) for _ in range(dim)] | |
self.Velocity_W = [0.0 for _ in range(dim)] | |
self.V = [[uniform(-self.ini_stdev, self.ini_stdev) for _ in range(self.n_components)] for _ in range(dim)] | |
self.Velocity_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)] | |
self.Velocity_bias = 0.0 | |
self.dim = dim | |
def adam_init(self): | |
self.iterations = 0 | |
self.beta_1 = 0.9 | |
self.beta_2 = 0.999 | |
self.epsilon=1e-8 | |
self.decay = 0. | |
self.inital_decay = self.decay | |
dim =self.dim | |
self.m_W = [0.0 for _ in range(dim)] | |
self.v_W = [0.0 for _ in range(dim)] | |
self.m_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)] | |
self.v_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)] | |
self.m_bias = 0.0 | |
self.v_bias = 0.0 | |
def adam_update(self,lr,x,residual): | |
if 0.<self.dropout<1.: | |
self.droupout_x(x) | |
lr = self.lr | |
if self.inital_decay > 0: | |
lr *= (1. / (1. + self.decay * self.iterations)) | |
t = self.iterations + 1 | |
lr_t = lr * sqrt(1. - pow(self.beta_2, t)) / (1. - pow(self.beta_1, t)) | |
for sample in x: | |
idx,value = sample | |
g = residual*value | |
m = self.m_W[idx] | |
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g | |
v = self.v_W[idx] | |
v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2) | |
p = self.W[idx] | |
p_t = p - lr_t *m_t / (sqrt(v_t) + self.epsilon) | |
if self.l2>0: | |
pt = pt - lr_t*self.l2*p | |
self.m_W[idx] = m_t | |
self.v_W[idx] = v_t | |
self.W[idx] = p_t | |
if self.interaction: | |
self._adam_update_fm(lr_t,x,residual) | |
m = self.m_bias | |
m_t = (self.beta_1 * m) + (1. - self.beta_1)*residual | |
v = self.v_bias | |
v_t = (self.beta_2 * v) + (1. - self.beta_2)*(residual**2) | |
p = self.bias | |
p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon) | |
if self.l2_bias>0: | |
pt = pt - lr_t * self.l2_bias*p | |
self.m_bias = m_t | |
self.v_bias = v_t | |
self.bias = p_t | |
self.iterations+=1 | |
def _adam_update_fm(self,lr_t,x,residual): | |
len_x = len(x) | |
sum_f_dict = self.sum_f_dict | |
n_components = self.n_components | |
for f in range(n_components): | |
for i in range(len_x): | |
idx_i,value_i = x[i] | |
v = self.V[idx_i][f] | |
sum_f = sum_f_dict[f] | |
g = (sum_f*value_i - v *value_i*value_i)*residual | |
m = self.m_V[idx_i][f] | |
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g | |
v = self.v_V[idx_i][f] | |
v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2) | |
p = self.V[idx_i][f] | |
p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon) | |
if self.l2_fm>0: | |
pt = pt - lr_t * self.l2_fm*p | |
self.m_V[idx_i][f] = m_t | |
self.v_V[idx_i][f] = v_t | |
self.V[idx_i][f] = p_t | |
def droupout_x(self,x): | |
new_x = [] | |
for i, var in enumerate(x): | |
if random() > self.dropout: | |
del x[i] | |
def _predict_fm(self,x): | |
len_x = len(x) | |
n_components = self.n_components | |
pred = 0.0 | |
self.sum_f_dict = {} | |
for f in range(n_components): | |
sum_f = 0.0 | |
sum_sqr_f = 0.0 | |
for i in range(len_x): | |
idx_i,value_i = x[i] | |
d = self.V[idx_i][f] * value_i | |
sum_f +=d | |
sum_sqr_f +=d*d | |
pred+= 0.5 * (sum_f*sum_f - sum_sqr_f); | |
self.sum_f_dict[f] = sum_f | |
return pred | |
def _predict_one(self,x): | |
pred = self.bias | |
# pred = 0.0 | |
for idx,value in x: | |
pred+=self.W[idx]*value | |
if self.interaction: | |
pred+=self._predict_fm(x) | |
# if self.task=='c': | |
# pred = bounded_sigmoid(pred) | |
return pred | |
def _update_fm(self,lr,x,residual): | |
len_x = len(x) | |
sum_f_dict = self.sum_f_dict | |
n_components = self.n_components | |
for f in range(n_components): | |
for i in range(len_x): | |
idx_i,value_i = x[i] | |
sum_f = sum_f_dict[f] | |
v = self.V[idx_i][f] | |
grad = (sum_f*value_i - v *value_i*value_i)*residual | |
self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad | |
if self.nesterov: | |
self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad | |
self.V[idx_i][f] = self.V[idx_i][f] + self.Velocity_V[idx_i][f] - self.l2_fm*self.V[idx_i][f] | |
def update(self,lr,x,residual): | |
if 0.<self.dropout<1.: | |
self.droupout_x(x) | |
for sample in x: | |
idx,value = sample | |
grad = residual*value | |
self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad | |
if self.nesterov: | |
self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad | |
self.W[idx] = self.W[idx] + self.Velocity_W[idx] - self.l2*self.W[idx] | |
if self.interaction: | |
self._update_fm(lr,x,residual) | |
self.Velocity_bias = self.momentum*self.Velocity_bias - lr*residual | |
if self.nesterov: | |
self.Velocity_bias = self.momentum*self.Velocity_bias - lr*residual | |
self.bias = self.bias +self.Velocity_bias - self.l2_bias*self.bias | |
def predict(self,path,out): | |
data = data_generator(path,self.no_norm,self.task) | |
y_preds =[] | |
with open(out, 'w') as outfile: | |
ID = 0 | |
outfile.write('%s,%s\n' % ('ID', 'target')) | |
for d in data: | |
x,y = d | |
p = self._predict_one(x) | |
outfile.write('%s,%s\n' % (ID, str(p))) | |
ID+=1 | |
def validate(self,path): | |
data = data_generator(path,self.no_norm,self.task) | |
loss = 0.0 | |
count = 0.0 | |
for d in data: | |
x,y = d | |
p = self._predict_one(x) | |
loss+=self.loss_function(y,p) | |
count+=1 | |
return loss/count | |
def save_weights(self): | |
weights = [] | |
weights.append(self.W) | |
weights.append(self.V) | |
weights.append(self.bias) | |
weights.append(self.Velocity_W) | |
weights.append(self.Velocity_V) | |
weights.append(self.dim) | |
pickle.dump(weights,open('sgd_fm.pkl','wb')) | |
def load_weights(self): | |
weights = pickle.load(open('sgd_fm.pkl','rb')) | |
self.W = weights[0] | |
self.V = weights[1] | |
self.bias = weights[2] | |
self.Velocity_W = weights[3] | |
self.Velocity_V = weights[4] | |
self.dim = weights[5] | |
def train(self,path,valid_path = None,in_memory=False): | |
start = datetime.now() | |
lr = self.lr | |
if self.adam: | |
self.adam_init() | |
self.update = self.adam_update | |
if in_memory: | |
data = data_generator(path,self.no_norm,self.task) | |
data = [d for d in data] | |
best_loss = 999999 | |
best_epoch = 0 | |
for epoch in range(1,self.nb_epoch+1): | |
if not in_memory: | |
data = data_generator(path,self.no_norm,self.task) | |
train_loss = 0.0 | |
train_count = 0 | |
for x,y in data: | |
p = self._predict_one(x) | |
if self.task!='c': | |
residual = -(y-p) | |
else: | |
residual = -y*(1.0-1.0/(1.0+exp(-y*p))); | |
# residual = -(y-p) | |
self.update(lr,x,residual) | |
if train_count%50000==0: | |
if train_count ==0: | |
print('\ttrain_count: %s, current loss: %.6f'%(train_count,0.0)) | |
else: | |
print('\ttrain_count: %s, current loss: %.6f'%(train_count,train_loss/train_count)) | |
train_loss += self.loss_function(y,p) | |
train_count += 1 | |
epoch_end = datetime.now() | |
duration = epoch_end-start | |
if valid_path: | |
valid_loss = self.validate(valid_path) | |
print('Epoch: %s, train loss: %.6f, valid loss: %.6f, time: %s'%(epoch,train_loss/train_count,valid_loss,duration)) | |
if valid_loss<best_loss: | |
best_loss = valid_loss | |
self.save_weights() | |
print('save_weights') | |
else: | |
print('Epoch: %s, train loss: %.6f, time: %s'%(epoch,train_loss/train_count,duration)) | |
path = "../input/" | |
''' | |
ploy3 best round 63 lr = 0.002,adam = True | |
''' | |
sgd = SGD(lr=0.001,momentum=0.9,adam=True,nesterov=True,dropout=0.0,l2=0.0,l2_fm=0.0,task='c',n_components=4,nb_epoch=63,interaction=True,no_norm=False) | |
sgd.preload(path+'X_cat_oh_high_order.svm',path+'X_t_cat_oh_high_order.svm') | |
# sgd.load_weights() | |
sgd.train(path+'X_train_cat_oh_high_order.svm',path+'X_test_cat_oh_high_order.svm',in_memory=False) | |
sgd.load_weights() | |
sgd.predict(path+'X_test_cat_oh_high_order.svm',out='valid.csv') | |
sgd.predict(path+'X_t_cat_oh_high_order.svm',out='out.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment