Created
July 12, 2016 18:24
-
-
Save joshuajnoble/090c8d4d084c0861d1e48a4ca2969ba1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "annTrain.h" | |
const int networkInputSize = 512; | |
void annTrain::setup() | |
{ | |
train(ofToDataPath("fingers/all", true), 512, 0.7); | |
} | |
void annTrain::update() | |
{ | |
} | |
void annTrain::draw() | |
{ | |
} | |
void annTrain::processClassAndDescForTest(const std::string& classname, const cv::Mat& descriptors) | |
{ | |
// Get histogram of visual words using bag of words technique | |
cv::Mat bowFeatures = getBOWFeatures(descriptors, networkInputSize); | |
cv::normalize(bowFeatures, bowFeatures, 0, bowFeatures.rows, cv::NORM_MINMAX, -1, cv::Mat()); | |
testSamples.push_back(bowFeatures); | |
testOutputExpected.push_back(getClassId(classes, classname)); | |
}; | |
void annTrain::processClassAndDesc(const std::string& classname, const cv::Mat& descriptors) | |
{ | |
// Append to the set of classes | |
classes.insert(classname); | |
// Append to the list of descriptors | |
descriptorsSet.push_back(descriptors); | |
// Append metadata to each extracted feature | |
ImageData* data = new ImageData; | |
data->classname = classname; | |
data->bowFeatures = cv::Mat::zeros(cv::Size(networkInputSize, 1), CV_32F); | |
for (int j = 0; j < descriptors.rows; j++) | |
{ | |
descriptorsMetadata.push_back(data); | |
} | |
} | |
//Get all files in directory (not recursive) | |
std::vector<std::string> annTrain::getFilesInDirectory(const std::string& directory) | |
{ | |
std::vector<std::string> files; | |
filesystem::path root(directory); | |
filesystem::directory_iterator it_end; | |
for (filesystem::directory_iterator it(root); it != it_end; ++it) | |
{ | |
if (filesystem::is_regular_file(it->path())) | |
{ | |
files.push_back(it->path().string()); | |
} | |
} | |
return files; | |
} | |
//Extract the class name from a file name | |
std::string annTrain::getClassName(const std::string& filename) | |
{ | |
// cout << filename << endl; | |
if( filename.find("1l") != std::string::npos ) | |
{ | |
return "1l"; | |
} | |
if( filename.find("2l") != std::string::npos ) | |
{ | |
return "2l"; | |
} | |
if( filename.find("1r") != std::string::npos ) | |
{ | |
return "1r"; | |
} | |
if( filename.find("2r") != std::string::npos ) | |
{ | |
return "2r"; | |
} | |
if( filename.find("nf") != std::string::npos ) | |
{ | |
return "nf"; | |
} | |
} | |
// get local features for an image | |
cv::Mat annTrain::getDescriptors(const cv::Mat& img) | |
{ | |
cv::Ptr<cv::KAZE> kaze = cv::KAZE::create(); | |
std::vector<cv::KeyPoint> keypoints; | |
cv::Mat descriptors; | |
//kaze->detectAndCompute(dst, cv::noArray(), keypoints, descriptors); | |
kaze->detect(img, keypoints); | |
kaze->compute(img, keypoints, descriptors); | |
return descriptors; | |
} | |
//Read images from a list of file names and gets class name and its local descriptors | |
void annTrain::readImages(vec_iter begin, vec_iter end) | |
{ | |
for (auto it = begin; it != end; ++it) | |
{ | |
std::string filename = *it; | |
std::cout << "Reading image " << filename << "..." << std::endl; | |
cv::Mat img = cv::imread(filename, 0); | |
if (img.empty()) | |
{ | |
std::cerr << " Could not read image " << filename << std::endl; | |
continue; | |
} | |
std::string classname = getClassName(filename); | |
cv::Mat descriptors = getDescriptors(img); | |
processClassAndDesc(classname, descriptors); | |
} | |
} | |
//Read images from a list of file names and returns, for each read image for testing | |
void annTrain::readImagesToTest(vec_iter begin, vec_iter end) | |
{ | |
for (auto it = begin; it != end; ++it) | |
{ | |
std::string filename = *it; | |
std::cout << "Reading image " << filename << "..." << std::endl; | |
cv::Mat img = cv::imread(filename, 0); | |
if (img.empty()) | |
{ | |
std::cerr << "WARNING: Could not read image." << std::endl; | |
continue; | |
} | |
std::string classname = getClassName(filename); | |
cv::Mat descriptors = getDescriptors(img); | |
processClassAndDescForTest(classname, descriptors); | |
} | |
} | |
//Transform a class name into an id | |
int annTrain::getClassId(const std::set<std::string>& classes, const std::string& classname) | |
{ | |
int index = 0; | |
for (auto it = classes.begin(); it != classes.end(); ++it) | |
{ | |
if (*it == classname) break; | |
++index; | |
} | |
return index; | |
} | |
//Get a binary code associated to a class | |
cv::Mat annTrain::getClassCode(const std::set<std::string>& classes, const std::string& classname) | |
{ | |
cv::Mat code = cv::Mat::zeros(cv::Size((int)classes.size(), 1), CV_32F); | |
int index = getClassId(classes, classname); | |
code.at<float>(index) = 1; | |
return code; | |
} | |
//Turn local features into a single bag of words histogram of | |
cv::Mat annTrain::getBOWFeatures(const cv::Mat& descriptors, int vocabularySize) | |
{ | |
cv::Mat outputArray = cv::Mat::zeros(cv::Size(vocabularySize, 1), CV_32F); | |
std::vector<cv::DMatch> matches; | |
flann->match(descriptors, matches); | |
for (size_t j = 0; j < matches.size(); j++) | |
{ | |
int visualWord = matches[j].trainIdx; | |
outputArray.at<float>(visualWord)++; | |
} | |
return outputArray; | |
} | |
//create the trained neural network | |
cv::Ptr<cv::ml::ANN_MLP> annTrain::getTrainedNeuralNetwork(const cv::Mat& trainSamples, | |
const cv::Mat& trainResponses) | |
{ | |
int networkInputSize = trainSamples.cols; | |
int networkOutputSize = trainResponses.cols; | |
cv::Ptr<cv::ml::ANN_MLP> mlp = cv::ml::ANN_MLP::create(); | |
std::vector<int> layerSizes = { networkInputSize, networkInputSize / 2, | |
networkOutputSize }; | |
mlp->setLayerSizes(layerSizes); | |
mlp->setActivationFunction(cv::ml::ANN_MLP::SIGMOID_SYM); | |
mlp->train(trainSamples, cv::ml::ROW_SAMPLE, trainResponses); | |
return mlp; | |
} | |
int annTrain::getPredictedClass(const cv::Mat& predictions) | |
{ | |
float maxPrediction = predictions.at<float>(0); | |
float maxPredictionIndex = 0; | |
const float* ptrPredictions = predictions.ptr<float>(0); | |
for (int i = 0; i < predictions.cols; i++) | |
{ | |
float prediction = *ptrPredictions++; | |
if (prediction > maxPrediction) | |
{ | |
maxPrediction = prediction; | |
maxPredictionIndex = i; | |
} | |
} | |
return maxPredictionIndex; | |
} | |
std::vector<std::vector<int> > annTrain::getConfusionMatrix() | |
{ | |
cv::Mat testOutput; | |
mlp->predict(testSamples, testOutput); | |
// we now have 5 classes | |
std::vector<std::vector<int> > confusionMatrix(5, std::vector<int>(5)); | |
for (int i = 0; i < testOutput.rows; i++) | |
{ | |
int predictedClass = getPredictedClass(testOutput.row(i)); | |
int expectedClass = testOutputExpected.at(i); | |
cout << expectedClass << " " << predictedClass; | |
confusionMatrix[expectedClass][predictedClass]++; | |
} | |
return confusionMatrix; | |
} | |
void annTrain::printConfusionMatrix(const std::vector<std::vector<int> >& confusionMatrix, const std::set<std::string>& classes) | |
{ | |
for (auto it = classes.begin(); it != classes.end(); ++it) | |
{ | |
std::cout << *it << " "; | |
} | |
std::cout << std::endl; | |
for (size_t i = 0; i < confusionMatrix.size(); i++) | |
{ | |
for (size_t j = 0; j < confusionMatrix[i].size(); j++) | |
{ | |
std::cout << confusionMatrix[i][j] << " "; | |
} | |
std::cout << std::endl; | |
} | |
} | |
// get the accuracy of the model | |
float annTrain::getAccuracy(const std::vector<std::vector<int> >& confusionMatrix) | |
{ | |
int hits = 0; | |
int total = 0; | |
for (size_t i = 0; i < confusionMatrix.size(); i++) | |
{ | |
for (size_t j = 0; j < confusionMatrix.at(i).size(); j++) | |
{ | |
if (i == j) hits += confusionMatrix.at(i).at(j); | |
total += confusionMatrix.at(i).at(j); | |
} | |
} | |
return hits / (float)total; | |
} | |
void annTrain::saveModels(const cv::Mat& vocabulary, const std::set<std::string>& classes) | |
{ | |
mlp->save(ofToDataPath("mlp.yaml", true)); | |
cv::FileStorage fs(ofToDataPath("vocabulary.yaml", true), cv::FileStorage::WRITE); | |
fs << "vocabulary" << vocabulary; | |
fs.release(); | |
std::ofstream classesOutput(ofToDataPath("classes.txt", true)); | |
for (auto it = classes.begin(); it != classes.end(); ++it) | |
{ | |
classesOutput << getClassId(classes, *it) << "\t" << *it << std::endl; | |
} | |
classesOutput.close(); | |
} | |
void annTrain::train(std::string imagesDir, int networkInputSize, float testRatio) | |
{ | |
std::cout << "Reading training set..." << std::endl; | |
uint64 start = ofGetElapsedTimeMillis(); | |
std::vector<std::string> files = getFilesInDirectory(imagesDir); | |
std::random_shuffle(files.begin(), files.end()); | |
cv::Mat img; | |
for (auto it = files.begin(); it != files.end(); ++it) | |
{ | |
std::string filename = *it; | |
//std::cout << "Reading image " << filename << "..." << std::endl; | |
img = cv::imread(filename, 0); | |
if (img.empty()) | |
{ | |
std::cerr << "WARNING: Could not read image." << std::endl; | |
continue; | |
} | |
std::string classname = getClassName(filename); | |
cv::Mat descriptors = getDescriptors(img); | |
processClassAndDesc(classname, descriptors); | |
} | |
std::cout << " Seconds : " << (ofGetElapsedTimeMillis() - start) / 1000.0 << std::endl; | |
std::cout << "Creating vocabulary..." << std::endl; | |
start = ofGetElapsedTimeMillis(); | |
cv::Mat labels; | |
cv::Mat vocabulary; | |
// Use k-means to find k centroids (the words of our vocabulary) | |
cv::kmeans(descriptorsSet, networkInputSize, labels, cv::TermCriteria(cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 10, 0.01), 1, cv::KMEANS_PP_CENTERS, vocabulary); | |
// No need to keep it on memory anymore | |
descriptorsSet.release(); | |
std::cout << " Seconds : " << (ofGetElapsedTimeMillis() - start) / 1000.0 << std::endl; | |
// Convert a set of local features for each image in a single descriptors | |
// using the bag of words technique | |
std::cout << "Getting histograms of visual words..." << std::endl; | |
int* ptrLabels = (int*)(labels.data); | |
int size = labels.rows * labels.cols; | |
for (int i = 0; i < size; i++) | |
{ | |
int label = *ptrLabels++; | |
ImageData* data = descriptorsMetadata[i]; | |
data->bowFeatures.at<float>(label)++; | |
} | |
// Filling matrixes to be used by the neural network | |
std::cout << "Preparing neural network..." << std::endl; | |
std::set<ImageData*> uniqueMetadata(descriptorsMetadata.begin(), descriptorsMetadata.end()); | |
for (auto it = uniqueMetadata.begin(); it != uniqueMetadata.end(); ) | |
{ | |
ImageData* data = *it; | |
cv::Mat normalizedHist; | |
cv::normalize(data->bowFeatures, normalizedHist, 0, data->bowFeatures.rows, cv::NORM_MINMAX, -1, cv::Mat()); | |
trainSamples.push_back(normalizedHist); | |
trainResponses.push_back(getClassCode(classes, data->classname)); | |
delete *it; // clear memory | |
it++; | |
} | |
descriptorsMetadata.clear(); | |
// Training neural network | |
std::cout << "Training neural network..." << std::endl; | |
start = ofGetElapsedTimeMillis(); | |
mlp = getTrainedNeuralNetwork(trainSamples, trainResponses); | |
std::cout << " Seconds : " << (ofGetElapsedTimeMillis() - start) / 1000.0 << std::endl; | |
// We can clear memory now | |
trainSamples.release(); | |
trainResponses.release(); | |
// Train FLANN | |
std::cout << "Training FLANN..." << std::endl; | |
start = ofGetElapsedTimeMillis(); | |
flann = cv::Ptr<cv::FlannBasedMatcher>(new cv::FlannBasedMatcher()); | |
flann->add(vocabulary); | |
flann->train(); | |
std::cout << " Seconds : " << (ofGetElapsedTimeMillis() - start) / 1000.0 << std::endl; | |
// Reading test set | |
std::cout << "Reading test set..." << std::endl; | |
start = ofGetElapsedTimeMillis(); | |
readImagesToTest(files.begin() + (size_t)(files.size() * testRatio), files.end()); | |
std::cout << " Seconds : " << (ofGetElapsedTimeMillis() - start) / 1000.0 << std::endl; | |
// Get confusion matrix of the test set | |
std::vector<std::vector<int> > confusionMatrix = getConfusionMatrix(); | |
// how accurate is our model | |
std::cout << "Confusion matrix " << std::endl; | |
printConfusionMatrix(confusionMatrix, classes); | |
std::cout << "Accuracy " << getAccuracy(confusionMatrix) << std::endl; | |
// now save everything | |
std::cout << "saving models" << std::endl; | |
saveModels(vocabulary, classes); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <boost/filesystem.hpp> | |
#include <vector> | |
#include <algorithm> | |
#include <functional> | |
#include <map> | |
#include <set> | |
#include <fstream> | |
#include "ofxOpenCv.h" | |
#include "ofxCv.h" | |
#include <opencv2/core/core.hpp> | |
#include <opencv2/highgui/highgui.hpp> | |
#include <opencv2/features2d/features2d.hpp> | |
#include <opencv2/ml/ml.hpp> | |
#include <boost/filesystem.hpp> | |
#include "ofMain.h" | |
//namespace fs = boost::filesystem; | |
typedef std::vector<std::string>::const_iterator vec_iter; | |
struct ImageData | |
{ | |
std::string classname; | |
cv::Mat bowFeatures; | |
}; | |
class annTrain : public ofBaseApp{ | |
public: | |
// of functions | |
void setup(); | |
void update(); | |
void draw(); | |
// our cv functions | |
void processClassAndDesc(const std::string& classname, const cv::Mat& descriptors); | |
std::vector<std::string> getFilesInDirectory(const std::string& directory); | |
inline std::string getClassName(const std::string& filename); | |
cv::Mat getDescriptors(const cv::Mat& img); | |
void readImages(vec_iter begin, vec_iter end); | |
void readImagesToTest(vec_iter begin, vec_iter end); | |
int getClassId(const std::set<std::string>& classes, const std::string& classname); | |
cv::Mat getClassCode(const std::set<std::string>& classes, const std::string& classname); | |
cv::Mat getBOWFeatures(const cv::Mat& descriptors,int vocabularySize); | |
cv::Ptr<cv::ml::ANN_MLP> getTrainedNeuralNetwork(const cv::Mat& trainSamples, const cv::Mat& trainResponses); | |
int getPredictedClass(const cv::Mat& predictions); | |
std::vector<std::vector<int> > getConfusionMatrix(); | |
void printConfusionMatrix(const std::vector<std::vector<int> >& confusionMatrix, const std::set<std::string>& classes); | |
float getAccuracy(const std::vector<std::vector<int> >& confusionMatrix); | |
void saveModels(const cv::Mat& vocabulary, const std::set<std::string>& classes); | |
void train(std::string imagesDir, int networkInputSize, float trainSplitRatio); | |
void processClassAndDescForTest(const std::string& classname, const cv::Mat& descriptors); | |
// class variables | |
cv::Mat descriptorsSet; | |
std::vector<ImageData*> descriptorsMetadata; | |
std::set<std::string> classes; | |
cv::Ptr<cv::FlannBasedMatcher> flann; | |
cv::Mat testSamples; | |
std::vector<int> testOutputExpected; | |
// | |
cv::Mat trainSamples; | |
cv::Mat trainResponses; | |
cv::Ptr<cv::ml::ANN_MLP> mlp; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment