Skip to content

Instantly share code, notes, and snippets.

@neerajvashistha
Created September 16, 2018 18:21
Show Gist options
  • Select an option

  • Save neerajvashistha/fe8a2bf39f7d241443561c459e0b491b to your computer and use it in GitHub Desktop.

Select an option

Save neerajvashistha/fe8a2bf39f7d241443561c459e0b491b to your computer and use it in GitHub Desktop.
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
model = LogisticRegression()
rfe = RFE(model,12)
rfe = rfe.fit(emp_train[x],emp_train[y])
print(rfe.support_)
print(rfe.ranking_)
cols = ['department', 'region', 'education', 'gender', 'recruitment_channel', 'no_of_trainings', 'age', 'previous_year_rating', 'length_of_service', 'KPIs_met >80%', 'awards_won?', 'avg_training_score', 'training_scr_35_50', 'training_scr_50_65', 'training_scr_65_80', 'training_scr_85_100', 'age_18_25', 'age_25_35', 'age_35_45', 'age_45_60', 'age_training', 'KPI_training', 'education_age', 'region_max', 'len_serv_0_5', 'len_serv_5_10', 'len_serv_10_15', 'len_serv_15_20', 'len_serv_25']
X = emp_train[cols]
y = emp_train['is_promoted']
from sklearn.cross_validation import train_test_split
X_train, y_train = X,y
X_test = emp_test[cols]
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
X_test['is_promoted'] = y_pred
X_test['employee_id'] = employee_id
sub = X_test.loc[:,['employee_id','is_promoted']]
print(sub.shape)
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(random_state = 42)
from pprint import pprint
pprint(rf.get_params())
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
pprint(random_grid)
rf = RandomForestRegressor(random_state = 42)
# Random search of parameters, using 3 fold cross validation,
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid,
n_iter = 100, scoring='neg_mean_absolute_error',
cv = 3, verbose=2, random_state=42, n_jobs=-1,
return_train_score=True)
# Fit the random search model
rf_random.fit(X_train, y_train);
rf_random.best_params_
rf_random.cv_results_
def evaluate(model, test_features, test_labels):
predictions = model.predict(test_features)
errors = abs(predictions - test_labels)
mape = 100 * np.mean(errors / test_labels)
accuracy = 100 - mape
print('Model Performance')
print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
print('Accuracy = {:0.2f}%.'.format(accuracy))
return accuracy
base_model = RandomForestRegressor(n_estimators = 10, random_state = 42)
X = emp_train[cols]
y = emp_train['is_promoted']
X_train, y_train = X,y
cols = ['department', 'region', 'education', 'gender', 'recruitment_channel', 'no_of_trainings', 'age', 'previous_year_rating', 'length_of_service', 'KPIs_met >80%', 'awards_won?', 'avg_training_score', 'training_scr_35_50', 'training_scr_50_65', 'training_scr_65_80', 'training_scr_85_100', 'age_18_25', 'age_25_35', 'age_35_45', 'age_45_60', 'age_training', 'KPI_training', 'education_age', 'region_max', 'len_serv_0_5', 'len_serv_5_10', 'len_serv_10_15', 'len_serv_15_20', 'len_serv_25']
X_test = emp_test[cols]
train_features = X_train
test_features = X_test
train_labels = y_train
#test_labels = y_test
base_model.fit(train_features, train_labels)
predictions = base_model.predict(test_features)
best_random = rf_random.best_estimator_
print(best_random)
y_pred = rf_random.predict(test_features)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment