uds5501 · July 18, 2018 07:34
diff --git a/refineData.py b/refineData.py
 # Importing the tasty stuff
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import confusion_matrix
 from sklearn.externals import joblib

 X = finalDf['size_in_mb']
 y = finalDf['pop_categories']

 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)

 # For single label values
 npX_train = np.array(X_train)
 npX_train = npX_train.reshape(-1,1)

 npX_test = np.array(X_test)
 npX_test = npX_test.reshape(-1,1)

 # Scaling, of course, for quicker learning time
 scaler = StandardScaler()

 npX_train = scaler.fit_transform(npX_train)
 npX_test = scaler.transform(npX_test)
	# Importing the tasty stuff
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import confusion_matrix
	from sklearn.externals import joblib

	X = finalDf['size_in_mb']
	y = finalDf['pop_categories']

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 21)

	# For single label values
	npX_train = np.array(X_train)
	npX_train = npX_train.reshape(-1,1)

	npX_test = np.array(X_test)
	npX_test = npX_test.reshape(-1,1)

	# Scaling, of course, for quicker learning time
	scaler = StandardScaler()

	npX_train = scaler.fit_transform(npX_train)
	npX_test = scaler.transform(npX_test)