This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Training Apriori on the dataset | |
from apyori import apriori | |
rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2) | |
# Visualising the results | |
results = list(rules) | |
myResults = [list(x) for x in results] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Data Preprocessing | |
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None) | |
transactions = [] | |
for i in range(0, 7501): | |
transactions.append([str(dataset.values[i,j]) for j in range(0, 20)]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fitting Hierarchical Clustering to the dataset | |
from sklearn.cluster import AgglomerativeClustering | |
hc = AgglomerativeClustering(n_clusters = 5, affinity = 'euclidean', linkage = 'ward') | |
y_hc = hc.fit_predict(X) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using the dendrogram to find the optimal number of clusters | |
import scipy.cluster.hierarchy as sch | |
dendrogram = sch.dendrogram(sch.linkage(X, method = 'ward')) | |
plt.title('Dendrogram') | |
plt.xlabel('Customers') | |
plt.ylabel('Euclidean distances') | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Visualising the clusters | |
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1') | |
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') | |
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3') | |
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') | |
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') | |
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') | |
plt.title('Clusters of customers') | |
plt.xlabel('Annual Income (k$)') | |
plt.ylabel('Spending Score (1-100)') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Using the elbow method to find the optimal number of clusters | |
from sklearn.cluster import KMeans | |
wcss = [] | |
for i in range(1, 11): | |
kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42) | |
kmeans.fit(X) | |
wcss.append(kmeans.inertia_) | |
plt.plot(range(1, 11), wcss) | |
plt.title('The Elbow Method') | |
plt.xlabel('Number of clusters') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Importing the dataset | |
dataset = pd.read_csv('Mall_Customers.csv') | |
X = dataset.iloc[:, [3, 4]].values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fitting Random Forest Classification to the Training set | |
from sklearn.ensemble import RandomForestClassifier | |
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0) | |
classifier.fit(X_train, y_train) | |
# Predicting the Test set results | |
y_pred = classifier.predict(X_test) | |
# Making the Confusion Matrix | |
from sklearn.metrics import confusion_matrix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fitting Naive Bayes to the Training set | |
from sklearn.naive_bayes import GaussianNB | |
classifier = GaussianNB() | |
classifier.fit(X_train, y_train) | |
# Predicting the Test set results | |
y_pred = classifier.predict(X_test) | |
# Making the Confusion Matrix | |
from sklearn.metrics import confusion_matrix |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fitting SVM to the Training set | |
from sklearn.svm import SVC | |
classifier = SVC(kernel = 'linear', random_state = 0) | |
classifier.fit(X_train, y_train) | |
# Predicting the Test set results | |
y_pred = classifier.predict(X_test) | |
# Making the Confusion Matrix | |
from sklearn.metrics import confusion_matrix |