Keenan Burke-Pitts Kiwibp

Data Analyst. Digital Marketer. Lifelong Learner.

Kiwibp / wordcloud.py

Created June 11, 2018 16:20

Craigslist Webscraping Project

	wordcloud = WordCloud(background_color='white', mode = "RGB", width = 2000, height=1000).generate(str(postings['name']))
	plt.title("Craigslist Used Items Word Cloud")
	plt.imshow(wordcloud)
	plt.axis("off")
	plt.show();

Kiwibp / feature_selection.py

Created June 13, 2018 13:47

	# Tree-based estimators can be used to compute feature importances, which in turn can be used to discard irrelevant features.
	clf = RandomForestClassifier(n_estimators=50, max_features='sqrt')
	clf = clf.fit(train, targets)

	# Let's have a look at the importance of each feature.
	features = pd.DataFrame()
	features['feature'] = train.columns
	features['importance'] = clf.feature_importances_

	# Sorting values by feature importance.

Kiwibp / cross_validation.py

Created June 13, 2018 14:14

	logreg = LogisticRegression()
	logreg_cv = LogisticRegressionCV()
	rf = RandomForestClassifier()
	gboost = GradientBoostingClassifier()
	svm = SVC(probability=True)
	knn = KNeighborsClassifier()
	dt = DecisionTreeClassifier()

	models = [logreg, logreg_cv, rf, gboost, svm, knn, dt]

Kiwibp / gboost_model_evaluation.py

Created June 13, 2018 14:19

	# train the model on the training set
	gboost.fit(X_train, y_train)

	# make class predictions for the testing set
	y_pred_class = gboost.predict(X_test)

	# IMPORTANT: first argument is true values, second argument is predicted values
	print(metrics.confusion_matrix(y_test, y_pred_class))

	binary = np.array([[125, 14],