Last active
August 29, 2015 14:21
-
-
Save alfredplpl/c5de11157df20ca11b69 to your computer and use it in GitHub Desktop.
多変量版のミニバッチSGDです。マルチタスク学習でないので、効率はすごく悪いです
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# This code is distributed under the 3-Clause BSD license (New BSD license). | |
# 基本的に作者の名前を書いていただければ、商用利用も可能です。なお、保証はしません。 | |
# 参考URL: http://osdn.jp/projects/opensource/wiki/licenses%2Fnew_BSD_license | |
from sklearn import linear_model | |
import Image | |
import numpy as np | |
from sklearn.cross_validation import ShuffleSplit | |
from sklearn.metrics import r2_score | |
class MultivariateMinibatchSGDR: | |
def __init__(self, batch_size=32, n_iter=5,alpha=0.0001,penalty="l2",verbose=0,loss='squared_loss'): | |
self.clfs=None | |
self.params={"n_iter":n_iter, "alpha":alpha, | |
"penalty":penalty, "verbose":verbose, "loss":loss} | |
self.batch_size=batch_size | |
def fit(self,X,y,n_iter=20,random_state=0): | |
self.ndim=y.shape[1] | |
self.clfs=[linear_model.SGDRegressor( n_iter=self.params["n_iter"], | |
alpha=self.params["alpha"], | |
penalty=self.params["penalty"], | |
verbose=self.params["verbose"], | |
loss=self.params["loss"]) | |
for x in xrange(self.ndim)] | |
rs=ShuffleSplit(X.shape[0], n_iter=n_iter,random_state=random_state, | |
test_size=1.0/self.batch_size) | |
for dummy, batch in rs: | |
for i in xrange(self.ndim): | |
self.clfs[i].partial_fit(X=X[batch],y=y[batch,i]) | |
def predict(self,X): | |
return np.array([np.array([clf.predict(x) for clf in self.clfs]).reshape(-1) for x in X]) | |
def score(self,X,y): | |
return r2_score(self.predict(X), y) | |
def get_params(self,deep=True): | |
return self.params; | |
#The following code doesn't run if you import this. | |
#importしても実行されないので無視してimportしてください | |
if __name__ == "__main__": | |
from sklearn.datasets import make_regression | |
from sklearn.cross_validation import cross_val_score | |
X,y=make_regression(n_samples=100000,n_features=20,n_targets=10,noise=10) | |
clf = MultivariateMinibatchSGDR(batch_size=4) | |
clfBaseline = linear_model.LinearRegression() | |
print cross_val_score(clf,X,y,cv=2) | |
print cross_val_score(clfBaseline,X,y,cv=2) | |
# Copyright (c) 2015, alfredplpl | |
# All rights reserved. | |
__author__ = 'alfredplpl' | |
# References: https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/training_algorithms/sgd.py | |
# https://www.kaggle.com/c/criteo-display-ad-challenge/forums/t/9561/how-to-apply-python-linear-model-sgdregressor-to-do-logistic-regression |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment