FranciscusRenatus’s gists

FranciscusRenatus / apriori_training.py

Created July 9, 2017 09:13

	# Training Apriori on the dataset
	from apyori import apriori
	rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2)

	# Visualising the results
	results = list(rules)
	myResults = [list(x) for x in results]

FranciscusRenatus / apriori_datapreprocessing.py

Created July 9, 2017 07:16

	# Data Preprocessing
	dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
	transactions = []
	for i in range(0, 7501):
	transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])

FranciscusRenatus / hc_fitting.py

Created July 5, 2017 19:49

	# Fitting Hierarchical Clustering to the dataset
	from sklearn.cluster import AgglomerativeClustering
	hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward')
	y_hc = hc.fit_predict(X)

FranciscusRenatus / hc_optimal_clusters.py

Created July 5, 2017 19:36

	# Using the dendrogram to find the optimal number of clusters
	import scipy.cluster.hierarchy as sch
	dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward'))
	plt.title('Dendrogram')
	plt.xlabel('Customers')
	plt.ylabel('Euclidean distances')
	plt.show()

FranciscusRenatus / visualizing_k-means.py

Created July 4, 2017 14:39

	# Visualising the clusters
	plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1')
	plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2')
	plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3')
	plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4')
	plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
	plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
	plt.title('Clusters of customers')
	plt.xlabel('Annual Income (k$)')
	plt.ylabel('Spending Score (1-100)')

FranciscusRenatus / elbow_method_kmeans.py

Created July 3, 2017 17:33

	# Using the elbow method to find the optimal number of clusters
	from sklearn.cluster import KMeans
	wcss = []
	for i in range(1, 11):
	kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
	kmeans.fit(X)
	wcss.append(kmeans.inertia_)
	plt.plot(range(1, 11), wcss)
	plt.title('The Elbow Method')
	plt.xlabel('Number of clusters')

FranciscusRenatus / kmeans_importing_data.py

Created July 3, 2017 17:32

	# Importing the dataset
	dataset = pd.read_csv('Mall_Customers.csv')
	X = dataset.iloc[:, [3, 4]].values

FranciscusRenatus / random_forest_regression.py

Created June 29, 2017 15:51

	# Fitting Random Forest Classification to the Training set
	from sklearn.ensemble import RandomForestClassifier
	classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
	classifier.fit(X_train, y_train)

	# Predicting the Test set results
	y_pred = classifier.predict(X_test)

	# Making the Confusion Matrix
	from sklearn.metrics import confusion_matrix

FranciscusRenatus / naive_bayes.py

Created June 29, 2017 11:48

	# Fitting Naive Bayes to the Training set
	from sklearn.naive_bayes import GaussianNB
	classifier = GaussianNB()
	classifier.fit(X_train, y_train)

	# Predicting the Test set results
	y_pred = classifier.predict(X_test)

	# Making the Confusion Matrix
	from sklearn.metrics import confusion_matrix

FranciscusRenatus / svm.py

Created June 27, 2017 19:45

	# Fitting SVM to the Training set
	from sklearn.svm import SVC
	classifier = SVC(kernel = 'linear', random_state = 0)
	classifier.fit(X_train, y_train)

	# Predicting the Test set results
	y_pred = classifier.predict(X_test)

	# Making the Confusion Matrix
	from sklearn.metrics import confusion_matrix

Frank FranciscusRenatus