Created
September 4, 2018 23:03
-
-
Save EmmanuelMess/f5b79fb42d27f05600f4eda171045f6e to your computer and use it in GitHub Desktop.
Competitive unsuperviced learning of MINST w/Oja's algorithm (18.45% error)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <cmath> | |
#include <random> | |
#include <csetjmp> | |
#include "mnist/mnist_reader.hpp" | |
using namespace std; | |
std::jmp_buf errorJumpBuffer; | |
const int IMAGE_SIZE = 784; | |
const int PASSES = 1000000; | |
mnist::MNIST_dataset<vector, vector<uint8_t>, uint8_t> DATASET = | |
mnist::read_dataset("./dataset/"); | |
vector<vector<vector<double>>> trainSample() { | |
vector<vector<vector<double>>> ret(10); | |
for (int i = 0; i < DATASET.training_images.size(); ++i) { | |
vector<double> newImage; | |
for(const auto pixel : DATASET.training_images[i]) { | |
newImage.push_back(pixel); | |
} | |
ret[DATASET.training_labels[i]].push_back(newImage); | |
} | |
return ret; | |
} | |
vector<vector<vector<double>>> testSample() { | |
vector<vector<vector<double>>> ret(10); | |
for (int i = 0; i < DATASET.test_images.size(); ++i) { | |
vector<double> newImage; | |
for(const auto pixel : DATASET.test_images[i]) { | |
newImage.push_back(pixel); | |
} | |
ret[DATASET.test_labels[i]].push_back(newImage); | |
} | |
return ret; | |
} | |
double vectorLength(const vector<double>& image) { | |
size_t sum = 0; | |
for(double pixel : image) { | |
sum += pixel * pixel; | |
} | |
return sqrt(sum); | |
} | |
void normalizeVector(vector<double> &image) { | |
double length = vectorLength(image); | |
if(length == 1) return; | |
for(double& pixel : image) { | |
pixel /= length; | |
} | |
} | |
double scalarProduct(const vector<double>& x, const vector<double>& y) { | |
if(x.size() != y.size()) { | |
cerr << "Error a!"; | |
longjmp(errorJumpBuffer, 1); | |
} | |
double ret = 0; | |
for (int i = 0; i < IMAGE_SIZE; ++i) { | |
ret += x[i] * y[i]; | |
} | |
return ret; | |
} | |
vector<double> scalarProduct(double x, const vector<double>& y) { | |
vector<double> ret; | |
for (const double& value : y) { | |
ret.push_back(x * value); | |
} | |
return ret; | |
} | |
vector<double> add(const vector<double>& x, const vector<double>& y) { | |
vector<double> ret(x); | |
for (int i = 0; i < x.size(); ++i) { | |
ret[i] += y[i]; | |
} | |
return ret; | |
} | |
vector<double> substract(const vector<double>& x, const vector<double>& y) { | |
vector<double> ret(x); | |
for (int i = 0; i < x.size(); ++i) { | |
ret[i] -= y[i]; | |
} | |
return ret; | |
} | |
vector<double> randNormalVector(size_t length) { | |
static std::random_device rd; | |
static std::mt19937 mt(rd()); | |
static std::normal_distribution<double> wDist(1, 1); | |
vector<double> ret; | |
for (int i = 0; i < length; ++i) { | |
ret.push_back(wDist(mt)); | |
} | |
normalizeVector(ret); | |
return ret; | |
} | |
int main() { | |
vector<vector<vector<double>>> sample = trainSample(); | |
for(vector<vector<double>>& cluster : sample) { | |
for(vector<double>& image : cluster) { | |
normalizeVector(image); | |
} | |
} | |
vector<vector<double>> weights; | |
for (int i = 0; i < 10; i++) { | |
weights.push_back(randNormalVector(IMAGE_SIZE)); | |
} | |
std::random_device rd; | |
std::mt19937 mt(rd()); | |
vector<double> lambdas(10, 0.1); | |
for (int i = 0; i < sample.size(); i++) { | |
const vector<vector<double>>& cluster = sample[i]; | |
if(cluster.empty()) { | |
cerr << endl << "Size of cluster " << i << " is 0!" << endl; | |
return 1; | |
} | |
std::uniform_int_distribution<size_t> xDist(0, cluster.size()-1); | |
for (int j = 0; j < PASSES; j++) { | |
const vector<double>& x = cluster[xDist(mt)]; | |
vector<double>& w = weights[i]; | |
double& lambda = lambdas[i]; | |
double phi = scalarProduct(x, w); | |
w = add(w, scalarProduct(lambda * phi, substract(x, scalarProduct(phi, w)))); | |
if(lambda > 0.0001f) lambda -= 0.0001f; | |
} | |
} | |
vector<vector<int>> results(10, vector<int>(10, 0)); | |
int notClassified = 0; | |
int error = 0; | |
vector<vector<vector<double>>> test = testSample(); | |
for (int i = 0; i < test.size(); i++) { | |
const vector<vector<double>>& cluster = test[i]; | |
if(cluster.empty()) { | |
cerr << endl << "Size of cluster " << i << " is 0!" << endl; | |
return 1; | |
} | |
for(const vector<double>& image : cluster) { | |
double bestProd = 0; | |
int bestWeight = -1; | |
for (int j = 0; j < weights.size(); j++) { | |
const vector<double>& weight = weights[j]; | |
double prod = scalarProduct(image, weight); | |
if(prod > bestProd) { | |
bestProd = prod; | |
bestWeight = j; | |
} | |
} | |
if(bestWeight == -1) { | |
notClassified++; | |
} else { | |
results[bestWeight][i]++; | |
if(bestWeight != i) error++; | |
} | |
} | |
} | |
cout << endl; | |
cout << "Not classified: " << notClassified << endl; | |
cout << "Error: " << error << " / 10.000 | " << (error/100.) << "%" << endl; | |
cout << endl; | |
for (int i = 0; i < results.size(); ++i) { | |
cout << "For neuron " << i << ":" << endl; | |
cout << "{"; | |
for (int j = 0; j < results[i].size(); ++j) { | |
cout << "[" << j << "]: " << results[i][j]; | |
if(j != results[i].size()-1) { | |
cout << ", "; | |
} | |
} | |
cout << "}" << endl; | |
} | |
return 0; | |
setjmp(errorJumpBuffer); | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment