halegreen · November 17, 2017 02:44
diff --git a/FFM.py b/FFM.py
 from datetime import datetime
 from csv import DictReader
 from math import exp, log, sqrt,pow
 import itertools
 import math
 from random import random,shuffle,uniform,seed
 import pickle
 import sys

 seed(1024)

 def data_generator(path,no_norm=False,task='c'):
    data = open(path,'r')
    for row in data:
        row = row.strip().split(" ")
        y = float(row[0])
        row = row[1:]
        x = []
        for feature in row:
            feature = feature.split(":")
            idx = int(feature[0])
            value = float(feature[1])
            x.append([idx,value])

        if not no_norm:
            r = 0.0
            for i in range(len(x)):
                r+=x[i][1]*x[i][1]
            for i in range(len(x)):
                x[i][1] /=r
        if task=='c':
            if y ==0.0:
                y = -1.0

        yield x,y


 def dot(u,v):
    u_v = 0.
    len_u = len(u)
    for idx in range(len_u):
        uu = u[idx]
        vv = v[idx]
        u_v+=uu*vv
    return u_v

 def mse_loss_function(y,p):
    return (y - p)**2

 def mae_loss_function(y,p):
    y = exp(y)
    p = exp(p)
    return abs(y - p)

 def log_loss_function(y,p):
    return -(y*log(p)+(1-y)*log(1-p))

 def exponential_loss_function(y,p):
    return log(1+exp(-y*p))

 def sigmoid(inX):
    return 1/(1+exp(-inX))

 def bounded_sigmoid(inX):
    return 1. / (1. + exp(-max(min(inX, 35.), -35.)))


 class SGD(object):
    def __init__(self,lr=0.001,momentum=0.9,nesterov=True,adam=False,l2=0.0,l2_fm=0.0,l2_bias=0.0,ini_stdev= 0.01,dropout=0.5,task='c',n_components=4,nb_epoch=5,interaction=False,no_norm=False):
        self.W = []
        self.V = []        
        self.bias = uniform(-ini_stdev, ini_stdev)
        self.n_components=n_components
        self.lr = lr
        self.l2 = l2
        self.l2_fm = l2_fm
        self.l2_bias = l2_bias
        self.momentum = momentum
        self.nesterov = nesterov
        self.adam = adam
        self.nb_epoch = nb_epoch
        self.ini_stdev = ini_stdev
        self.task = task
        self.interaction = interaction
        self.dropout = dropout
        self.no_norm = no_norm
        if self.task!='c':
            # self.loss_function = mse_loss_function
            self.loss_function = mae_loss_function
        else:
            self.loss_function = exponential_loss_function
            # self.loss_function = log_loss_function

    def preload(self,train,test):
        train = data_generator(train,self.no_norm,self.task)
        dim = 0
        count = 0
        for x,y in train:
            for i in x:
                idx,value = i
                if idx >dim:
                    dim = idx
            count+=1
        print('Training samples:',count)
        test = data_generator(test,self.no_norm,self.task)
        count=0
        for x,y in test:
            for i in x:
                idx,value = i
                if idx >dim:
                    dim = idx
            count+=1
        print('Testing samples:',count)
        
        dim = dim+1
        print("Number of features:",dim)

        self.W = [uniform(-self.ini_stdev, self.ini_stdev) for _ in range(dim)]
        self.Velocity_W = [0.0 for _ in range(dim)]
        

        self.V = [[uniform(-self.ini_stdev, self.ini_stdev) for _ in range(self.n_components)] for _ in range(dim)]
        self.Velocity_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]

        self.Velocity_bias = 0.0

        self.dim = dim


    def adam_init(self):
        self.iterations = 0
        self.beta_1 = 0.9
        self.beta_2 = 0.999
        self.epsilon=1e-8
        self.decay = 0.
        self.inital_decay = self.decay 

        dim =self.dim

        self.m_W = [0.0 for _ in range(dim)]
        self.v_W = [0.0 for _ in range(dim)]

        self.m_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]
        self.v_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]

        self.m_bias = 0.0
        self.v_bias = 0.0


    def adam_update(self,lr,x,residual):

        if 0.<self.dropout<1.:
            self.droupout_x(x)
        
        lr = self.lr
        if self.inital_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1

        lr_t = lr * sqrt(1. - pow(self.beta_2, t)) / (1. - pow(self.beta_1, t))
        
        for sample in x:
            idx,value = sample
            g = residual*value

            m = self.m_W[idx]
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g

            v = self.v_W[idx]
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2)

            p = self.W[idx]
            p_t = p - lr_t *m_t / (sqrt(v_t) + self.epsilon)

            if self.l2>0:
                pt = pt - lr_t*self.l2*p

            self.m_W[idx] = m_t
            self.v_W[idx] = v_t
            self.W[idx] = p_t

        if self.interaction:
            self._adam_update_fm(lr_t,x,residual)


        m = self.m_bias
        m_t = (self.beta_1 * m) + (1. - self.beta_1)*residual

        v = self.v_bias
        v_t = (self.beta_2 * v) + (1. - self.beta_2)*(residual**2)

        p = self.bias
        p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)
        if self.l2_bias>0:
            pt = pt - lr_t * self.l2_bias*p

        self.m_bias = m_t
        self.v_bias = v_t
        self.bias = p_t

        self.iterations+=1

    def _adam_update_fm(self,lr_t,x,residual):
        len_x = len(x)
        sum_f_dict = self.sum_f_dict
        n_components = self.n_components
        for f in range(n_components):
            for i in range(len_x):
                idx_i,value_i = x[i]
                v = self.V[idx_i][f]
                sum_f = sum_f_dict[f]
                g = (sum_f*value_i - v *value_i*value_i)*residual

                m = self.m_V[idx_i][f]
                m_t = (self.beta_1 * m) + (1. - self.beta_1) * g

                v = self.v_V[idx_i][f]
                v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2)

                p = self.V[idx_i][f]
                p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)

                if self.l2_fm>0:
                    pt = pt - lr_t * self.l2_fm*p

                self.m_V[idx_i][f] = m_t
                self.v_V[idx_i][f] = v_t
                self.V[idx_i][f] = p_t

    def droupout_x(self,x):
        new_x = []
        for i, var in enumerate(x):
            if random() > self.dropout:
                del x[i]

    def _predict_fm(self,x):
        len_x = len(x)
        n_components = self.n_components
        pred = 0.0
        self.sum_f_dict = {}
        for f in range(n_components):
            sum_f = 0.0
            sum_sqr_f = 0.0
            for i in range(len_x):
                idx_i,value_i = x[i]
                d = self.V[idx_i][f] * value_i
                sum_f +=d
                sum_sqr_f +=d*d
            pred+= 0.5 * (sum_f*sum_f - sum_sqr_f);
            self.sum_f_dict[f] = sum_f
        return pred

    def _predict_one(self,x):
        pred = self.bias
        # pred = 0.0
        for idx,value in x:
            pred+=self.W[idx]*value
        
        if self.interaction:
            pred+=self._predict_fm(x)

        # if self.task=='c':
        #     pred = bounded_sigmoid(pred)
        return pred


    def _update_fm(self,lr,x,residual):
        len_x = len(x)
        sum_f_dict = self.sum_f_dict
        n_components = self.n_components
        for f in range(n_components):
            for i in range(len_x):
                idx_i,value_i = x[i]
                sum_f = sum_f_dict[f]
                v = self.V[idx_i][f]
                grad = (sum_f*value_i - v *value_i*value_i)*residual
                
                self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad
                if self.nesterov:
                    self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad
                self.V[idx_i][f] = self.V[idx_i][f] + self.Velocity_V[idx_i][f] - self.l2_fm*self.V[idx_i][f]



    def update(self,lr,x,residual):

        if 0.<self.dropout<1.:
            self.droupout_x(x)

        for sample in x:
            idx,value = sample
            grad = residual*value
            self.Velocity_W[idx] =  self.momentum * self.Velocity_W[idx] - lr * grad
            if self.nesterov:
                 self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad
            self.W[idx] = self.W[idx] + self.Velocity_W[idx] - self.l2*self.W[idx]
            
        if self.interaction:
            self._update_fm(lr,x,residual)

        self.Velocity_bias = self.momentum*self.Velocity_bias - lr*residual
        if self.nesterov:
            self.Velocity_bias = self.momentum*self.Velocity_bias - lr*residual
        self.bias = self.bias +self.Velocity_bias - self.l2_bias*self.bias

    def predict(self,path,out):

        data = data_generator(path,self.no_norm,self.task)
        y_preds =[]
        with open(out, 'w') as outfile:
            ID = 0
            outfile.write('%s,%s\n' % ('ID', 'target'))
            for d in data:
                x,y = d
                p = self._predict_one(x)
                outfile.write('%s,%s\n' % (ID, str(p)))
                ID+=1


    def validate(self,path):
        data = data_generator(path,self.no_norm,self.task)
        loss = 0.0
        count = 0.0

        for d in data:
            x,y = d
            p = self._predict_one(x)
            loss+=self.loss_function(y,p)
            count+=1
        return loss/count

    def save_weights(self):
        weights = []
        weights.append(self.W)
        weights.append(self.V)
        weights.append(self.bias)
        weights.append(self.Velocity_W)
        weights.append(self.Velocity_V)
        weights.append(self.dim)
        pickle.dump(weights,open('sgd_fm.pkl','wb'))

    def load_weights(self):
        weights = pickle.load(open('sgd_fm.pkl','rb'))
        self.W = weights[0]
        self.V = weights[1]
        self.bias = weights[2]
        self.Velocity_W = weights[3]
        self.Velocity_V = weights[4]
        self.dim = weights[5]
        

    def train(self,path,valid_path = None,in_memory=False):

        start = datetime.now()
        lr = self.lr
        if self.adam:
            self.adam_init()
            self.update = self.adam_update

        if in_memory:
            data = data_generator(path,self.no_norm,self.task)
            data = [d for d in data]
        best_loss = 999999
        best_epoch = 0
        for epoch in range(1,self.nb_epoch+1):
            if not in_memory:
                data = data_generator(path,self.no_norm,self.task)
            train_loss = 0.0
            train_count = 0
            for x,y in data:
                p = self._predict_one(x)
                if self.task!='c':                    
                    residual = -(y-p)
                else:
                    residual = -y*(1.0-1.0/(1.0+exp(-y*p)));
                    # residual = -(y-p)

                self.update(lr,x,residual)
                if train_count%50000==0:
                    if train_count ==0:
                        print('\ttrain_count: %s, current loss: %.6f'%(train_count,0.0))
                    else:
                        print('\ttrain_count: %s, current loss: %.6f'%(train_count,train_loss/train_count))

                train_loss += self.loss_function(y,p)
                train_count += 1

            epoch_end = datetime.now()
            duration = epoch_end-start
            
            if valid_path:
                valid_loss = self.validate(valid_path)
                print('Epoch: %s, train loss: %.6f, valid loss: %.6f, time: %s'%(epoch,train_loss/train_count,valid_loss,duration))
                if valid_loss<best_loss:
                    best_loss = valid_loss
                    self.save_weights()
                    print('save_weights')
            else:
                print('Epoch: %s, train loss: %.6f, time: %s'%(epoch,train_loss/train_count,duration))


 path = "../input/"

 '''
 ploy3 best round 63 lr = 0.002,adam = True
 '''

 sgd = SGD(lr=0.001,momentum=0.9,adam=True,nesterov=True,dropout=0.0,l2=0.0,l2_fm=0.0,task='c',n_components=4,nb_epoch=63,interaction=True,no_norm=False)
 sgd.preload(path+'X_cat_oh_high_order.svm',path+'X_t_cat_oh_high_order.svm')
 # sgd.load_weights()
 sgd.train(path+'X_train_cat_oh_high_order.svm',path+'X_test_cat_oh_high_order.svm',in_memory=False)
 sgd.load_weights()
 sgd.predict(path+'X_test_cat_oh_high_order.svm',out='valid.csv')
 sgd.predict(path+'X_t_cat_oh_high_order.svm',out='out.csv')
	from datetime import datetime
	from csv import DictReader
	from math import exp, log, sqrt,pow
	import itertools
	import math
	from random import random,shuffle,uniform,seed
	import pickle
	import sys

	seed(1024)

	def data_generator(path,no_norm=False,task='c'):
	data = open(path,'r')
	for row in data:
	row = row.strip().split(" ")
	y = float(row[0])
	row = row[1:]
	x = []
	for feature in row:
	feature = feature.split(":")
	idx = int(feature[0])
	value = float(feature[1])
	x.append([idx,value])

	if not no_norm:
	r = 0.0
	for i in range(len(x)):
	r+=x[i][1]*x[i][1]
	for i in range(len(x)):
	x[i][1] /=r
	if task=='c':
	if y ==0.0:
	y = -1.0

	yield x,y


	def dot(u,v):
	u_v = 0.
	len_u = len(u)
	for idx in range(len_u):
	uu = u[idx]
	vv = v[idx]
	u_v+=uu*vv
	return u_v

	def mse_loss_function(y,p):
	return (y - p)**2

	def mae_loss_function(y,p):
	y = exp(y)
	p = exp(p)
	return abs(y - p)

	def log_loss_function(y,p):
	return -(ylog(p)+(1-y)log(1-p))

	def exponential_loss_function(y,p):
	return log(1+exp(-y*p))

	def sigmoid(inX):
	return 1/(1+exp(-inX))

	def bounded_sigmoid(inX):
	return 1. / (1. + exp(-max(min(inX, 35.), -35.)))


	class SGD(object):
	def __init__(self,lr=0.001,momentum=0.9,nesterov=True,adam=False,l2=0.0,l2_fm=0.0,l2_bias=0.0,ini_stdev= 0.01,dropout=0.5,task='c',n_components=4,nb_epoch=5,interaction=False,no_norm=False):
	self.W = []
	self.V = []
	self.bias = uniform(-ini_stdev, ini_stdev)
	self.n_components=n_components
	self.lr = lr
	self.l2 = l2
	self.l2_fm = l2_fm
	self.l2_bias = l2_bias
	self.momentum = momentum
	self.nesterov = nesterov
	self.adam = adam
	self.nb_epoch = nb_epoch
	self.ini_stdev = ini_stdev
	self.task = task
	self.interaction = interaction
	self.dropout = dropout
	self.no_norm = no_norm
	if self.task!='c':
	# self.loss_function = mse_loss_function
	self.loss_function = mae_loss_function
	else:
	self.loss_function = exponential_loss_function
	# self.loss_function = log_loss_function

	def preload(self,train,test):
	train = data_generator(train,self.no_norm,self.task)
	dim = 0
	count = 0
	for x,y in train:
	for i in x:
	idx,value = i
	if idx >dim:
	dim = idx
	count+=1
	print('Training samples:',count)
	test = data_generator(test,self.no_norm,self.task)
	count=0
	for x,y in test:
	for i in x:
	idx,value = i
	if idx >dim:
	dim = idx
	count+=1
	print('Testing samples:',count)

	dim = dim+1
	print("Number of features:",dim)

	self.W = [uniform(-self.ini_stdev, self.ini_stdev) for _ in range(dim)]
	self.Velocity_W = [0.0 for _ in range(dim)]


	self.V = [[uniform(-self.ini_stdev, self.ini_stdev) for _ in range(self.n_components)] for _ in range(dim)]
	self.Velocity_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]

	self.Velocity_bias = 0.0

	self.dim = dim


	def adam_init(self):
	self.iterations = 0
	self.beta_1 = 0.9
	self.beta_2 = 0.999
	self.epsilon=1e-8
	self.decay = 0.
	self.inital_decay = self.decay

	dim =self.dim

	self.m_W = [0.0 for _ in range(dim)]
	self.v_W = [0.0 for _ in range(dim)]

	self.m_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]
	self.v_V = [[0.0 for _ in range(self.n_components)] for _ in range(dim)]

	self.m_bias = 0.0
	self.v_bias = 0.0


	def adam_update(self,lr,x,residual):

	if 0.<self.dropout<1.:
	self.droupout_x(x)

	lr = self.lr
	if self.inital_decay > 0:
	lr = (1. / (1. + self.decay self.iterations))

	t = self.iterations + 1

	lr_t = lr * sqrt(1. - pow(self.beta_2, t)) / (1. - pow(self.beta_1, t))

	for sample in x:
	idx,value = sample
	g = residual*value

	m = self.m_W[idx]
	m_t = (self.beta_1 * m) + (1. - self.beta_1) * g

	v = self.v_W[idx]
	v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2)

	p = self.W[idx]
	p_t = p - lr_t *m_t / (sqrt(v_t) + self.epsilon)

	if self.l2>0:
	pt = pt - lr_tself.l2p

	self.m_W[idx] = m_t
	self.v_W[idx] = v_t
	self.W[idx] = p_t

	if self.interaction:
	self._adam_update_fm(lr_t,x,residual)


	m = self.m_bias
	m_t = (self.beta_1 * m) + (1. - self.beta_1)*residual

	v = self.v_bias
	v_t = (self.beta_2 * v) + (1. - self.beta_2)(residual*2)

	p = self.bias
	p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)
	if self.l2_bias>0:
	pt = pt - lr_t * self.l2_bias*p

	self.m_bias = m_t
	self.v_bias = v_t
	self.bias = p_t

	self.iterations+=1

	def _adam_update_fm(self,lr_t,x,residual):
	len_x = len(x)
	sum_f_dict = self.sum_f_dict
	n_components = self.n_components
	for f in range(n_components):
	for i in range(len_x):
	idx_i,value_i = x[i]
	v = self.V[idx_i][f]
	sum_f = sum_f_dict[f]
	g = (sum_fvalue_i - v value_ivalue_i)residual

	m = self.m_V[idx_i][f]
	m_t = (self.beta_1 * m) + (1. - self.beta_1) * g

	v = self.v_V[idx_i][f]
	v_t = (self.beta_2 * v) + (1. - self.beta_2) * (g**2)

	p = self.V[idx_i][f]
	p_t = p - lr_t * m_t / (sqrt(v_t) + self.epsilon)

	if self.l2_fm>0:
	pt = pt - lr_t * self.l2_fm*p

	self.m_V[idx_i][f] = m_t
	self.v_V[idx_i][f] = v_t
	self.V[idx_i][f] = p_t

	def droupout_x(self,x):
	new_x = []
	for i, var in enumerate(x):
	if random() > self.dropout:
	del x[i]

	def _predict_fm(self,x):
	len_x = len(x)
	n_components = self.n_components
	pred = 0.0
	self.sum_f_dict = {}
	for f in range(n_components):
	sum_f = 0.0
	sum_sqr_f = 0.0
	for i in range(len_x):
	idx_i,value_i = x[i]
	d = self.V[idx_i][f] * value_i
	sum_f +=d
	sum_sqr_f +=d*d
	pred+= 0.5 * (sum_f*sum_f - sum_sqr_f);
	self.sum_f_dict[f] = sum_f
	return pred

	def _predict_one(self,x):
	pred = self.bias
	# pred = 0.0
	for idx,value in x:
	pred+=self.W[idx]*value

	if self.interaction:
	pred+=self._predict_fm(x)

	# if self.task=='c':
	# pred = bounded_sigmoid(pred)
	return pred


	def _update_fm(self,lr,x,residual):
	len_x = len(x)
	sum_f_dict = self.sum_f_dict
	n_components = self.n_components
	for f in range(n_components):
	for i in range(len_x):
	idx_i,value_i = x[i]
	sum_f = sum_f_dict[f]
	v = self.V[idx_i][f]
	grad = (sum_fvalue_i - v value_ivalue_i)residual

	self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad
	if self.nesterov:
	self.Velocity_V[idx_i][f] = self.momentum * self.Velocity_V[idx_i][f] - lr * grad
	self.V[idx_i][f] = self.V[idx_i][f] + self.Velocity_V[idx_i][f] - self.l2_fm*self.V[idx_i][f]



	def update(self,lr,x,residual):

	if 0.<self.dropout<1.:
	self.droupout_x(x)

	for sample in x:
	idx,value = sample
	grad = residual*value
	self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad
	if self.nesterov:
	self.Velocity_W[idx] = self.momentum * self.Velocity_W[idx] - lr * grad
	self.W[idx] = self.W[idx] + self.Velocity_W[idx] - self.l2*self.W[idx]

	if self.interaction:
	self._update_fm(lr,x,residual)

	self.Velocity_bias = self.momentumself.Velocity_bias - lrresidual
	if self.nesterov:
	self.Velocity_bias = self.momentumself.Velocity_bias - lrresidual
	self.bias = self.bias +self.Velocity_bias - self.l2_bias*self.bias

	def predict(self,path,out):

	data = data_generator(path,self.no_norm,self.task)
	y_preds =[]
	with open(out, 'w') as outfile:
	ID = 0
	outfile.write('%s,%s\n' % ('ID', 'target'))
	for d in data:
	x,y = d
	p = self._predict_one(x)
	outfile.write('%s,%s\n' % (ID, str(p)))
	ID+=1


	def validate(self,path):
	data = data_generator(path,self.no_norm,self.task)
	loss = 0.0
	count = 0.0

	for d in data:
	x,y = d
	p = self._predict_one(x)
	loss+=self.loss_function(y,p)
	count+=1
	return loss/count

	def save_weights(self):
	weights = []
	weights.append(self.W)
	weights.append(self.V)
	weights.append(self.bias)
	weights.append(self.Velocity_W)
	weights.append(self.Velocity_V)
	weights.append(self.dim)
	pickle.dump(weights,open('sgd_fm.pkl','wb'))

	def load_weights(self):
	weights = pickle.load(open('sgd_fm.pkl','rb'))
	self.W = weights[0]
	self.V = weights[1]
	self.bias = weights[2]
	self.Velocity_W = weights[3]
	self.Velocity_V = weights[4]
	self.dim = weights[5]


	def train(self,path,valid_path = None,in_memory=False):

	start = datetime.now()
	lr = self.lr
	if self.adam:
	self.adam_init()
	self.update = self.adam_update

	if in_memory:
	data = data_generator(path,self.no_norm,self.task)
	data = [d for d in data]
	best_loss = 999999
	best_epoch = 0
	for epoch in range(1,self.nb_epoch+1):
	if not in_memory:
	data = data_generator(path,self.no_norm,self.task)
	train_loss = 0.0
	train_count = 0
	for x,y in data:
	p = self._predict_one(x)
	if self.task!='c':
	residual = -(y-p)
	else:
	residual = -y(1.0-1.0/(1.0+exp(-yp)));
	# residual = -(y-p)

	self.update(lr,x,residual)
	if train_count%50000==0:
	if train_count ==0:
	print('\ttrain_count: %s, current loss: %.6f'%(train_count,0.0))
	else:
	print('\ttrain_count: %s, current loss: %.6f'%(train_count,train_loss/train_count))

	train_loss += self.loss_function(y,p)
	train_count += 1

	epoch_end = datetime.now()
	duration = epoch_end-start

	if valid_path:
	valid_loss = self.validate(valid_path)
	print('Epoch: %s, train loss: %.6f, valid loss: %.6f, time: %s'%(epoch,train_loss/train_count,valid_loss,duration))
	if valid_loss<best_loss:
	best_loss = valid_loss
	self.save_weights()
	print('save_weights')
	else:
	print('Epoch: %s, train loss: %.6f, time: %s'%(epoch,train_loss/train_count,duration))


	path = "../input/"

	'''
	ploy3 best round 63 lr = 0.002,adam = True
	'''

	sgd = SGD(lr=0.001,momentum=0.9,adam=True,nesterov=True,dropout=0.0,l2=0.0,l2_fm=0.0,task='c',n_components=4,nb_epoch=63,interaction=True,no_norm=False)
	sgd.preload(path+'X_cat_oh_high_order.svm',path+'X_t_cat_oh_high_order.svm')
	# sgd.load_weights()
	sgd.train(path+'X_train_cat_oh_high_order.svm',path+'X_test_cat_oh_high_order.svm',in_memory=False)
	sgd.load_weights()
	sgd.predict(path+'X_test_cat_oh_high_order.svm',out='valid.csv')
	sgd.predict(path+'X_t_cat_oh_high_order.svm',out='out.csv')