Skip to content

Instantly share code, notes, and snippets.

@halegreen
Created November 17, 2017 02:44
Show Gist options
  • Save halegreen/9e5e983539c460a4e3c1c1b23371f465 to your computer and use it in GitHub Desktop.
Save halegreen/9e5e983539c460a4e3c1c1b23371f465 to your computer and use it in GitHub Desktop.
Python implementation of Field-aware Factorization Machine Model
from datetime import datetime
from csv import DictReader
from math import exp, log, sqrt,pow
import itertools
import math
from random import random,shuffle,uniform,seed
import pickle
import sys
seed(1024)
def data_generator(path,no_norm=False,task='c'):
data = open(path,'r')
for row in data:
row = row.strip().split(" ")
y = float(row[0])
row = row[1:]
x = []
for feature in row:
feature = feature.split(":")
idx = int(feature[0])
value = float(feature[1])
x.append([idx,value])
if not no_norm:
r = 0.0
for i in range(len(x)):
r+=x[i][1]*x[i][1]
for i in range(len(x)):
x[i][1] /=r
if task=='c':
if y ==0.0:
y = -1.0
yield x,y
def dot(u,v):
u_v = 0.
len_u = len(u)
for idx in range(len_u):
uu = u[idx]
vv = v[idx]
u_v+=uu*vv
return u_v
def mse_loss_function(y,p):
return (y - p)**2
def mae_loss_function(y,p):
y = exp(y)
p = exp(p)
return abs(y - p)
def log_loss_function(y,p):
return -(y*log(p)+(1-y)*log(1-p))
def exponential_loss_function(y,p):
return log(1+exp(-y*p))
def sigmoid(inX):
return 1/(1+exp(-inX))
def bounded_sigmoid(inX):
return 1. / (1. + exp(-max(min(inX, 35.), -35.)))
class SGD(object):
def __init__(self,lr=0.001,momentum=0.9,nesterov=True,adam=False,l2=0.0,l2_fm=0.0,l2_bias=0.0,ini_stdev= 0.01,dropout=0.5,task='c',n_components=4,nb_epoch=5,interaction=False,no_norm=False):
self.W = []
self.V = []
self.bias = uniform(-ini_stdev, ini_stdev)
self.n_components=n_components
self.lr = lr
self.l2 = l2
self.l2_fm = l2_fm
self.l2_bias = l2_bias
self.momentum = momentum
self.nesterov = nesterov
self.adam = adam
self.nb_epoch = nb_epoch
self.ini_stdev = ini_stdev
self.task = task
self.interaction = interaction
self.dropout = dropout
self.no_norm = no_norm
if self.task!='c':
# self.loss_function = mse_loss_function
self.loss_function = mae_loss_function
else:
self.loss_function = exponential_loss_function
# self.loss_function = log_loss_function
def preload(self,train,test):
train = data_generator(train,self.no_norm,self.task)
dim = 0
count = 0
for x,y in train:
for i in x:
idx,value = i
if idx >dim:
dim = idx
count+=1
print('Training samples:',count)
test = data_generator(test,self.no_norm,self.task)
count=0
for x,y in test:
for i in x:
idx,value = i
if idx >dim:
dim = idx
count+=1
print('Testing samples:',count)
dim = dim+1
print("Number of features:",dim)
self.W = [uniform(-self.ini_stdev, self.ini_stdev) for _ in range(dim)]
self.Velocity_W = [0.0 for _ in range(dim)]
self.V = [[uniform(-self.ini_stdev, self.ini_stdev) for _ in range(self.n_components)] for _ in range(dim)]
self.Velocity_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]
self.Velocity_bias = 0.0
self.dim = dim
def adam_init(self):
self.iterations = 0
self.beta_1 = 0.9
self.beta_2 = 0.999
self.epsilon=1e-8
self.decay = 0.
self.inital_decay = self.decay
dim =self.dim
self.m_W = [0.0 for _ in range(dim)]
self.v_W = [0.0 for _ in range(dim)]
self.m_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]
self.v_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]
self.m_bias = 0.0
self.v_bias = 0.0
def adam_update(self,lr,x,residual):
if 0.<self.dropout<1.:
self.droupout_x(x)
lr = self.lr
if self.inital_decay > 0:
lr *= (1. / (1. + self.decay * self.iterations))
t = self.iterations + 1
lr_t = lr * sqrt(1. - pow(self.beta_2, t)) / (1. - pow(self.beta_1, t))
for sample in x:
idx,value = sample
g = residual*value
m = self.m_W[idx]
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
v = self.v_W[idx]
v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2)
p = self.W[idx]
p_t = p - lr_t *m_t / (sqrt(v_t) + self.epsilon)
if self.l2>0:
pt = pt - lr_t*self.l2*p
self.m_W[idx] = m_t
self.v_W[idx] = v_t
self.W[idx] = p_t
if self.interaction:
self._adam_update_fm(lr_t,x,residual)
m = self.m_bias
m_t = (self.beta_1 * m) + (1. - self.beta_1)*residual
v = self.v_bias
v_t = (self.beta_2 * v) + (1. - self.beta_2)*(residual**2)
p = self.bias
p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)
if self.l2_bias>0:
pt = pt - lr_t * self.l2_bias*p
self.m_bias = m_t
self.v_bias = v_t
self.bias = p_t
self.iterations+=1
def _adam_update_fm(self,lr_t,x,residual):
len_x = len(x)
sum_f_dict = self.sum_f_dict
n_components = self.n_components
for f in range(n_components):
for i in range(len_x):
idx_i,value_i = x[i]
v = self.V[idx_i][f]
sum_f = sum_f_dict[f]
g = (sum_f*value_i - v *value_i*value_i)*residual
m = self.m_V[idx_i][f]
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
v = self.v_V[idx_i][f]
v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2)
p = self.V[idx_i][f]
p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)
if self.l2_fm>0:
pt = pt - lr_t * self.l2_fm*p
self.m_V[idx_i][f] = m_t
self.v_V[idx_i][f] = v_t
self.V[idx_i][f] = p_t
def droupout_x(self,x):
new_x = []
for i, var in enumerate(x):
if random() > self.dropout:
del x[i]
def _predict_fm(self,x):
len_x = len(x)
n_components = self.n_components
pred = 0.0
self.sum_f_dict = {}
for f in range(n_components):
sum_f = 0.0
sum_sqr_f = 0.0
for i in range(len_x):
idx_i,value_i = x[i]
d = self.V[idx_i][f] * value_i
sum_f +=d
sum_sqr_f +=d*d
pred+= 0.5 * (sum_f*sum_f - sum_sqr_f);
self.sum_f_dict[f] = sum_f
return pred
def _predict_one(self,x):
pred = self.bias
# pred = 0.0
for idx,value in x:
pred+=self.W[idx]*value
if self.interaction:
pred+=self._predict_fm(x)
# if self.task=='c':
# pred = bounded_sigmoid(pred)
return pred
def _update_fm(self,lr,x,residual):
len_x = len(x)
sum_f_dict = self.sum_f_dict
n_components = self.n_components
for f in range(n_components):
for i in range(len_x):
idx_i,value_i = x[i]
sum_f = sum_f_dict[f]
v = self.V[idx_i][f]
grad = (sum_f*value_i - v *value_i*value_i)*residual
self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad
if self.nesterov:
self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad
self.V[idx_i][f] = self.V[idx_i][f] + self.Velocity_V[idx_i][f] - self.l2_fm*self.V[idx_i][f]
def update(self,lr,x,residual):
if 0.<self.dropout<1.:
self.droupout_x(x)
for sample in x:
idx,value = sample
grad = residual*value
self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad
if self.nesterov:
self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad
self.W[idx] = self.W[idx] + self.Velocity_W[idx] - self.l2*self.W[idx]
if self.interaction:
self._update_fm(lr,x,residual)
self.Velocity_bias = self.momentum*self.Velocity_bias - lr*residual
if self.nesterov:
self.Velocity_bias = self.momentum*self.Velocity_bias - lr*residual
self.bias = self.bias +self.Velocity_bias - self.l2_bias*self.bias
def predict(self,path,out):
data = data_generator(path,self.no_norm,self.task)
y_preds =[]
with open(out, 'w') as outfile:
ID = 0
outfile.write('%s,%s\n' % ('ID', 'target'))
for d in data:
x,y = d
p = self._predict_one(x)
outfile.write('%s,%s\n' % (ID, str(p)))
ID+=1
def validate(self,path):
data = data_generator(path,self.no_norm,self.task)
loss = 0.0
count = 0.0
for d in data:
x,y = d
p = self._predict_one(x)
loss+=self.loss_function(y,p)
count+=1
return loss/count
def save_weights(self):
weights = []
weights.append(self.W)
weights.append(self.V)
weights.append(self.bias)
weights.append(self.Velocity_W)
weights.append(self.Velocity_V)
weights.append(self.dim)
pickle.dump(weights,open('sgd_fm.pkl','wb'))
def load_weights(self):
weights = pickle.load(open('sgd_fm.pkl','rb'))
self.W = weights[0]
self.V = weights[1]
self.bias = weights[2]
self.Velocity_W = weights[3]
self.Velocity_V = weights[4]
self.dim = weights[5]
def train(self,path,valid_path = None,in_memory=False):
start = datetime.now()
lr = self.lr
if self.adam:
self.adam_init()
self.update = self.adam_update
if in_memory:
data = data_generator(path,self.no_norm,self.task)
data = [d for d in data]
best_loss = 999999
best_epoch = 0
for epoch in range(1,self.nb_epoch+1):
if not in_memory:
data = data_generator(path,self.no_norm,self.task)
train_loss = 0.0
train_count = 0
for x,y in data:
p = self._predict_one(x)
if self.task!='c':
residual = -(y-p)
else:
residual = -y*(1.0-1.0/(1.0+exp(-y*p)));
# residual = -(y-p)
self.update(lr,x,residual)
if train_count%50000==0:
if train_count ==0:
print('\ttrain_count: %s, current loss: %.6f'%(train_count,0.0))
else:
print('\ttrain_count: %s, current loss: %.6f'%(train_count,train_loss/train_count))
train_loss += self.loss_function(y,p)
train_count += 1
epoch_end = datetime.now()
duration = epoch_end-start
if valid_path:
valid_loss = self.validate(valid_path)
print('Epoch: %s, train loss: %.6f, valid loss: %.6f, time: %s'%(epoch,train_loss/train_count,valid_loss,duration))
if valid_loss<best_loss:
best_loss = valid_loss
self.save_weights()
print('save_weights')
else:
print('Epoch: %s, train loss: %.6f, time: %s'%(epoch,train_loss/train_count,duration))
path = "../input/"
'''
ploy3 best round 63 lr = 0.002,adam = True
'''
sgd = SGD(lr=0.001,momentum=0.9,adam=True,nesterov=True,dropout=0.0,l2=0.0,l2_fm=0.0,task='c',n_components=4,nb_epoch=63,interaction=True,no_norm=False)
sgd.preload(path+'X_cat_oh_high_order.svm',path+'X_t_cat_oh_high_order.svm')
# sgd.load_weights()
sgd.train(path+'X_train_cat_oh_high_order.svm',path+'X_test_cat_oh_high_order.svm',in_memory=False)
sgd.load_weights()
sgd.predict(path+'X_test_cat_oh_high_order.svm',out='valid.csv')
sgd.predict(path+'X_t_cat_oh_high_order.svm',out='out.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment