Skip to content

Instantly share code, notes, and snippets.

@uds5501
Created July 18, 2018 07:34
Show Gist options
  • Save uds5501/43020a0115ceb4296423a9ac289ebb30 to your computer and use it in GitHub Desktop.
Save uds5501/43020a0115ceb4296423a9ac289ebb30 to your computer and use it in GitHub Desktop.
for blog post 1
# Importing the tasty stuff
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.externals import joblib
X = finalDf['size_in_mb']
y = finalDf['pop_categories']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)
# For single label values
npX_train = np.array(X_train)
npX_train = npX_train.reshape(-1,1)
npX_test = np.array(X_test)
npX_test = npX_test.reshape(-1,1)
# Scaling, of course, for quicker learning time
scaler = StandardScaler()
npX_train = scaler.fit_transform(npX_train)
npX_test = scaler.transform(npX_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment