Skip to content

Instantly share code, notes, and snippets.

@gatapia
Created April 28, 2014 06:22
Show Gist options
  • Save gatapia/11363090 to your computer and use it in GitHub Desktop.
Save gatapia/11363090 to your computer and use it in GitHub Desktop.
A python script that causes deadlocks in ipython
import sklearn as sk
import numpy as np
import pandas as pd
import scipy as scipy
import cPickle as pickle
import math
from collections import Counter
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import ShuffleSplit
from scipy.stats import sem
from scipy.stats.mstats import mode
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
def do_gs(model, params, n_samples=1000, cv=3, n_jobs=-1):
gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, verbose=2)
gs.fit(X_train[:n_samples], y_train[:n_samples])
print(gs.best_params_, gs.best_score_)
return gs
f = open( "../data/all_data.p", "rb" )
data = pickle.load(f)
f.close()
seed = 0
X_train = data['train_munged'];
y_train = data['y'];
%del data
print "Loaded train[" + `len(X_train)` + "]"
data = { 'min_samples_leaf': [1, 2, 3, 4, 5], 'min_samples_split': [4, 5, 6],
'max_depth': [21, 22, 23, 24], 'max_features': [36, 37, 38, 39, 40],
'n_estimators': [470, 475, 480] }
gs = do_gs(RandomForestClassifier(random_state=seed, n_jobs=1),
data, cv=3, n_samples=2500, n_jobs=30)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment