Last active
August 16, 2017 23:20
-
-
Save kor01/68c92acd74c196c017fbdf53a4a14b9a to your computer and use it in GitHub Desktop.
[scikit-learn] scikit cheets #scikit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.image as mpimg | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import cv2 | |
import glob | |
import time | |
from sklearn.svm import SVC | |
from sklearn.preprocessing import StandardScaler | |
# NOTE: the next import is only valid | |
# for scikit-learn version <= 0.17 | |
# if you are using scikit-learn >= 0.18 then use this: | |
# from sklearn.model_selection import train_test_split | |
from sklearn.cross_validation import train_test_split | |
# Define a function to compute color histogram features | |
def color_hist(img, nbins=32, bins_range=(0, 256)): | |
# Convert from RGB to HSV using cv2.cvtColor() | |
hsv_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) | |
# Compute the histogram of the HSV channels separately | |
h_hist = np.histogram(hsv_img[:,:,0], bins=nbins, range=bins_range) | |
s_hist = np.histogram(hsv_img[:,:,1], bins=nbins, range=bins_range) | |
v_hist = np.histogram(hsv_img[:,:,2], bins=nbins, range=bins_range) | |
# Concatenate the histograms into a single feature vector | |
hist_features = np.concatenate((h_hist[0], s_hist[0], v_hist[0])).astype(np.float64) | |
# Normalize the result | |
norm_features = hist_features / np.sum(hist_features) | |
# Return the feature vector | |
return norm_features | |
# Define a function to extract features from a list of images | |
# Have this function call color_hist() | |
def extract_features(imgs, hist_bins=32, hist_range=(0, 256)): | |
# Create a list to append feature vectors to | |
features = [] | |
# Iterate through the list of images | |
for file in imgs: | |
# Read in each one by one | |
image = mpimg.imread(file) | |
# Apply color_hist() | |
hist_features = color_hist(image, nbins=hist_bins, bins_range=hist_range) | |
# Append the new feature vector to the features list | |
features.append(hist_features) | |
# Return list of feature vectors | |
return features | |
# Read in car and non-car images | |
images = glob.glob('*.jpeg') | |
cars = [] | |
notcars = [] | |
for image in images: | |
if 'image' in image or 'extra' in image: | |
notcars.append(image) | |
else: | |
cars.append(image) | |
# TODO play with these values to see how your classifier | |
# performs under different binning scenarios | |
histbin = 32 | |
car_features = extract_features(cars, hist_bins=histbin, hist_range=(0, 256)) | |
notcar_features = extract_features(notcars, hist_bins=histbin, hist_range=(0, 256)) | |
# Create an array stack of feature vectors | |
X = np.vstack((car_features, notcar_features)).astype(np.float64) | |
# Fit a per-column scaler | |
X_scaler = StandardScaler().fit(X) | |
# Apply the scaler to X | |
scaled_X = X_scaler.transform(X) | |
# Define the labels vector | |
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features)))) | |
# Split up data into randomized training and test sets | |
rand_state = np.random.randint(0, 100) | |
X_train, X_test, y_train, y_test = train_test_split( | |
scaled_X, y, test_size=0.2, random_state=rand_state) | |
print('Dataset includes', len(cars), 'cars and', len(notcars), 'not-cars') | |
print('Using', histbin,'histogram bins') | |
print('Feature vector length:', len(X_train[0])) | |
# Use a linear SVC | |
svc = SVC(kernel='linear') | |
# Check the training time for the SVC | |
t=time.time() | |
svc.fit(X_train, y_train) | |
t2 = time.time() | |
print(round(t2-t, 2), 'Seconds to train SVC...') | |
# Check the score of the SVC | |
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4)) | |
# Check the prediction time for a single sample | |
t=time.time() | |
n_predict = 10 | |
print('My SVC predicts: ', svc.predict(X_test[0:n_predict])) | |
print('For these',n_predict, 'labels: ', y_test[0:n_predict]) | |
t2 = time.time() | |
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn import svm | |
from generate_clusters import cluster_gen | |
np.random.seed(424) # Change the number to generate a different cluster. | |
n_clusters = 3 | |
clusters_x, clusters_y, labels = cluster_gen(n_clusters) | |
# Convert to a training dataset in sklearn format | |
X = np.float32((np.concatenate(clusters_x), np.concatenate(clusters_y))).transpose() | |
y = np.float32((np.concatenate(labels))) | |
# Create an instance of SVM and fit the data. | |
ker = 'linear' | |
svc = svm.SVC(kernel=ker).fit(X, y) | |
# Create a mesh that we will use to colorfully plot the decision surface | |
# Plotting Routine courtesy of: http://scikit-learn.org/stable/auto_examples/svm/plot_iris.html#sphx-glr-auto-examples-svm-plot-iris-py | |
# Note: this coloring scheme breaks down at > 7 clusters or so | |
h = 0.2 # step size in the mesh | |
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # -1 and +1 to add some margins | |
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 | |
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), | |
np.arange(y_min, y_max, h)) | |
# Classify each block of the mesh (used to assign its color) | |
Z = svc.predict(np.c_[xx.ravel(), yy.ravel()]) | |
# Put the result into a color plot | |
Z = Z.reshape(xx.shape) | |
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8) | |
# Plot the training points | |
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm, edgecolors='black') | |
plt.xlim(xx.min(), xx.max()) | |
plt.ylim(yy.min(), yy.max()) | |
plt.xticks(()) | |
plt.yticks(()) | |
plt.title('SVC with '+ker+' kernel', fontsize=20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment