Created
November 26, 2016 07:22
-
-
Save jovianlin/27fdfb4217520285d66ee591aa453e78 to your computer and use it in GitHub Desktop.
Mini Batch Learning with SGD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import SGDRegressor | |
# https://adventuresindatascience.wordpress.com/2014/12/30/minibatch-learning-for-large-scale-data-using-scikit-learn/ | |
def iter_minibatches(chunksize, numtrainingpoints): | |
# Provide chunks one by one | |
chunkstartmarker = 0 | |
while chunkstartmarker < numtrainingpoints: | |
chunkrows = range(chunkstartmarker,chunkstartmarker+chunksize) | |
X_chunk, y_chunk = getrows(chunkrows) | |
# We haven’t said anything about the getrows() function in the code above, since it | |
# pretty much depends on the specifics of where the data resides. Common situations | |
# might involve the data being stored on disk, stored in distributed fashion, obtained | |
# from an interface etc. | |
yield X_chunk, y_chunk | |
chunkstartmarker += chunksize | |
def main(): | |
batcherator = iter_minibatches(chunksize=1000) | |
model = SGDRegressor() # SGDClassifier | |
# Train model | |
for X_chunk, y_chunk in batcherator: | |
model.partial_fit(X_chunk, y_chunk) | |
# End of for loop. | |
# Now make predictions with trained model | |
y_predicted = model.predict(X_test) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment