Last active
August 9, 2016 07:38
-
-
Save akkijp/6820bcea5db37e783953de60c8040d8a to your computer and use it in GitHub Desktop.
scikit-learn で機械学習を試してみた
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
from sklearn import datasets | |
from sklearn.cross_validation import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.linear_model import Perceptron | |
from sklearn.metrics import accuracy_score | |
import numpy as np | |
iris = datasets.load_iris() | |
x = iris.data[:, [2, 3]] | |
y = iris.target | |
x_train, x_test, y_train, y_test = train_test_split( | |
x, y, test_size=0.3, random_state=0) | |
sc = StandardScaler() | |
sc.fit(x_train) | |
x_train_std = sc.transform(x_train) | |
x_test_std = sc.transform(x_test) | |
ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0, shuffle=True) | |
ppn.fit(x_train_std, y_train) | |
y_pred = ppn.predict(x_test_std) | |
print('Misclassified samples: %d' % (y_test != y_pred).sum()) | |
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred)) | |
from matplotlib.colors import ListedColormap | |
import matplotlib.pyplot as plt | |
def plot_decision_regions(x, y, classifier, test_idx=None, resolution=0.02): | |
markers = ('s', 'x', 'o', '^', 'v') | |
colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') | |
cmap = ListedColormap(colors[:len(np.unique(y))]) | |
x1_min, x1_max = x[:, 0].min(), x[:, 0].max() + 1 | |
x2_min, x2_max = x[:, 1].min(), x[:, 1].max() + 1 | |
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), | |
np.arange(x2_min, x2_max, resolution)) | |
z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) | |
z = z.reshape(xx1.shape) | |
plt.contourf(xx1, xx2, z, alpha=0.4, cmap=cmap) | |
plt.xlim(xx1.min(), xx1.max()) | |
plt.ylim(xx2.min(), xx2.max()) | |
for idx, cl in enumerate(np.unique(y)): | |
plt.scatter(x=x[y == cl, 0], y=x[y == cl, 1], | |
alpha=0.8, c=cmap(idx), | |
marker=markers[idx], label=cl) | |
if test_idx: | |
x_test, y_test = x[test_idx, :], y[test_idx] | |
plt.scatter(x_test[:, 0], x_test[:, 1], c='', | |
alpha=1.0, linewidths=1, marker='o', | |
s=55, label='test set') | |
x_combined_std = np.vstack((x_train_std, x_test_std)) | |
y_combined = np.hstack((y_train, y_test)) | |
plot_decision_regions(x=x_combined_std, y=y_combined, classifier=ppn, test_idx=range(105, 150)) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment