Skip to content

Instantly share code, notes, and snippets.

@SoSeDiK
Created April 12, 2023 02:43
Show Gist options
  • Save SoSeDiK/baf0068151f37aff26700ae48d947726 to your computer and use it in GitHub Desktop.
Save SoSeDiK/baf0068151f37aff26700ae48d947726 to your computer and use it in GitHub Desktop.
Обчислювальний інтелект — Модуль 1
import numpy as np
import pandas as pd
from matplotlib.colors import ListedColormap
from sklearn import datasets
from sklearn.datasets import make_blobs
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
import matplotlib.pyplot as plt
# Reproducible results
random_seed = 2023
np.random.seed = random_seed
max_k_to_try = 100
steps_between_k = 1
def classification():
# Form data frame
df_iris, feature_names = load_iris_dataframe()
df_iris = normalize_data(df_iris, feature_names)
df_iris = shuffle_data(df_iris)
# Split data
x_train, x_test, y_train, y_test = train_test_split(df_iris[feature_names], df_iris['label'],
random_state=random_seed, train_size=0.75)
# Scale for better accuracy
x_train, x_test = scale_data(x_train, x_test)
# Train KNN classifier
k_best, score_best, _ = find_best_k(x_train, x_test, y_train, y_test, True)
print('Classifier: The best k = {} , score = {}'.format(k_best, score_best))
def regression():
# Form data frame
df_random, feature_names = generate_random_dataframe()
df_random = normalize_data(df_random, feature_names)
# Split data
x_train, x_test, y_train, y_test = train_test_split(df_random[feature_names], df_random['label'],
random_state=random_seed, train_size=0.75)
# Scale for better accuracy
x_train, x_test = scale_data(x_train, x_test)
# Train KNN regression
k_best, score_best, knnr = find_best_k(x_train, x_test, y_train, y_test, False)
print('Regression: The best k = {} , score = {}'.format(k_best, score_best))
# Visualize decision boundary
# Find min-max of coordinates
x_min, x_max = x_train[:, 0].min() - 0.1, x_train[:, 0].max() + 0.1
y_min, y_max = x_train[:, 1].min() - 0.1, x_train[:, 1].max() + 0.1
# Make a grid using those coordinates
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
# Obtain predicted values of each point
x_input = np.c_[xx.ravel(), yy.ravel()]
y_pred = knnr.predict(x_input)
# Apply the same shape for y (required for contour function)
y_pred = np.round(y_pred).reshape(xx.shape)
# Plot the decision boundary
cmap_bold = ListedColormap(['blue', '#FFFF00', 'black', 'green'])
plt.figure()
plt.contourf(xx, yy, y_pred, cmap=cmap_bold, alpha=0.7)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, s=40, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()
def load_iris_dataframe():
# Load iris dataset
iris = datasets.load_iris()
x_d, y_d, labels, feature_names = iris.data, iris.target, iris.target_names, iris.feature_names
# Form data frame
df_iris = pd.DataFrame(x_d, columns=feature_names)
df_iris['label'] = y_d
features_dict = {k: v for k, v in enumerate(labels)}
df_iris['label_names'] = df_iris.label.apply(lambda x: features_dict[x])
return df_iris, feature_names
def generate_random_dataframe():
feature_names = ['x0', 'x1']
x_d, y_d = make_blobs(n_samples=1000, n_features=len(feature_names), centers=8, cluster_std=1.3,
random_state=random_seed)
y_d = y_d % 2
plt.figure()
plt.title('Sample binary classification problem with non-linearly separable classes')
cmap_bold = ListedColormap(['blue', '#FFFF00', 'black', 'green'])
plt.scatter(x_d[:, 0], x_d[:, 1], c=y_d,
marker='o', s=30, cmap=cmap_bold)
df_random = pd.DataFrame(x_d, columns=feature_names)
df_random['label'] = y_d
return df_random, feature_names
def normalize_data(dataframe, feature_names):
min_max_scaler = MinMaxScaler()
dataframe[feature_names] = min_max_scaler.fit_transform(dataframe[feature_names])
return dataframe
def shuffle_data(dataframe):
dataframe = shuffle(dataframe, random_state=random_seed)
return dataframe
def scale_data(x_train, x_test):
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
return x_train, x_test
def find_best_k(x_train, x_test, y_train, y_test, use_classifier):
k_best = 1
score_best = 0
knn = None
for k in range(1, max_k_to_try, steps_between_k):
knn = KNeighborsClassifier(n_neighbors=k) if use_classifier else KNeighborsRegressor(n_neighbors=k)
knn = knn.fit(x_train, y_train)
score = knn.score(x_test, y_test)
if score > score_best:
k_best = k
score_best = score
if score == 1:
break
return k_best, score_best, knn
# Task 1
classification()
# Task 2
regression()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment