Created
January 14, 2018 05:07
-
-
Save marty1885/a42f1a361f29cc26f6ab5b4b995e9ff2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//#define DISP_WEIGHT | |
#include <Athena/Athena.hpp> | |
#include <Athena/XtensorBackend.hpp> | |
#include <Athena/NNPACKBackend.hpp> | |
#include "mnist_reader.hpp" | |
//Need to use xtensor API due to incomplete Tensor implementation | |
#include <xtensor/xarray.hpp> | |
#include <iostream> | |
#include <chrono> | |
#include <random> | |
#include <opencv2/core/core.hpp> | |
#include <opencv2/highgui/highgui.hpp> | |
#include <opencv2/imgproc/imgproc.hpp> | |
using namespace cv; | |
Mat tensor2Mat(const At::Tensor& image) | |
{ | |
Mat res(28,28, CV_32F); | |
auto data = image.host(); | |
memcpy(res.data, &data[0], data.size()*sizeof(float)); | |
Mat sc; | |
resize(res,sc,{100,100}); | |
return sc; | |
} | |
using namespace std::chrono; | |
int maxElementIndex(const std::vector<float>& vec) | |
{ | |
return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end())); | |
} | |
At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend) | |
{ | |
std::vector<float> res; | |
res.reserve(arr.size()*arr[0].size()); | |
for(const auto& img : arr) | |
{ | |
for(const auto v : img) | |
res.push_back(v/255.f); | |
} | |
return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend); | |
} | |
std::vector<float> onehot(int ind, int total) | |
{ | |
std::vector<float> vec(total); | |
for(int i=0;i<total;i++) | |
vec[i] = (i==ind)?1.f:0.f; | |
return vec; | |
} | |
At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend) | |
{ | |
std::vector<float> buffer; | |
buffer.reserve(labels.size()*10); | |
for(const auto& label : labels) | |
{ | |
std::vector<float> onehotVec = onehot(label, 10); | |
for(const auto v : onehotVec) | |
buffer.push_back((float)v); | |
} | |
return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend); | |
} | |
using namespace At; | |
//STDP Layer | |
//This is a fully-connected layer that tries to emulate to real neurons work. | |
//Each enuron in this layer compares singal from upper layer to a wheight over | |
//a guessian function. Then the neuron sums up all comparsion results larger than | |
//a pre-determined threashold to produce a score. Then the layer selects the top | |
//N neurons with the highest score to fire signal to the next layer. (Basiclly | |
//it outputs a Sparse Data Representation) | |
class STDP : public FullyConnectedLayer | |
{ | |
public: | |
STDP(intmax_t outputNum):dist(0, 1) | |
{ | |
setOutputShape(Shape({Shape::None, outputNum})); | |
setType("STDP"); | |
} | |
virtual void build() override | |
{ | |
intmax_t outputNum = outputShape()[1]; | |
intmax_t inputNum = inputShape()[1]; | |
//Initialize algorithms | |
guessian = backend()->getAlgorithm<SigmoidForward>("guessian"); | |
updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector"); | |
activate = backend()->getAlgorithm<SigmoidForward>("activate"); | |
//Initialize weights | |
for(intmax_t i=0;i<outputNum;i++) | |
{ | |
weights_.push_back(At::rand(0, 1, {inputNum}, *backend())); | |
} | |
activatedCount_ = std::vector<intmax_t>(outputNum, 0); | |
} | |
virtual Tensor forward(const Tensor& x) override | |
{ | |
#if 1 | |
if(count_++ < 3750) | |
return bootstrap(x); | |
else | |
return run(x); | |
#else | |
return bootstrap(x); | |
#endif | |
} | |
Tensor bootstrap(const Tensor& x) | |
{ | |
Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend()); | |
intmax_t outputNum = outputShape()[1]; | |
std::vector<std::pair<float, int>> res(outputShape()[1]); | |
for(intmax_t i=0;i<x.shape()[0];i++) | |
{ | |
auto vec = x.slice({i}); | |
float* arr = out.hostPtr()+out.shape()[1]*i; | |
#pragma omp parallel for | |
for(intmax_t j=0;j<outputShape()[1];j++) | |
{ | |
auto diff = vec-weights_[j]; | |
auto proximity = activate(diff); | |
float prox = proximity.sum({1}).hostPtr()[0]/vec.size(); | |
//Naive(?) implementation of boosting | |
#if 1 | |
intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j]; | |
prox *= (float)std::min(actiDiff, 15L)*0.03f + 0.9f; | |
#endif | |
res[j] = {prox, j}; | |
arr[j] = 0; | |
} | |
intmax_t useNum = 5; | |
std::nth_element(res.begin(), res.begin()+useNum, res.end(), [](auto a, auto b){return a.first > b.first;}); | |
//#pragma omp parallel for | |
for(intmax_t j=0;j<useNum;j++) | |
{ | |
intmax_t index = res[j].second; | |
float v = res[j].first; | |
activatedCount_[index]++; | |
if(activatedCount_[index] > currentMaxActivateNum_) | |
currentMaxActivateNum_ = activatedCount_[index]; | |
arr[index] = (2.f*(v*v-(3.f-2.f*v))-1.f)*4.f; | |
//This bug saved me...? | |
// auto& w = weights_[index]; | |
// optimizer.update(w, updateVector(w-vec)); | |
} | |
for(intmax_t j=0;j<useNum;j++) | |
{ | |
intmax_t index = res[j].second; | |
auto& w = weights_[index]; | |
optimizer.update(w, updateVector(w-vec)); | |
} | |
#ifdef DISP_WEIGHT | |
for(int i=0;i<30;i++) | |
{ | |
Mat asd = tensor2Mat(weights_[i]); | |
imshow("display" + std::to_string(i), asd); | |
} | |
waitKey(3); | |
#endif | |
} | |
return out; | |
} | |
Tensor run(const Tensor& x) | |
{ | |
Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend()); | |
intmax_t outputNum = outputShape()[1]; | |
std::vector<std::pair<float, int>> res(outputShape()[1]); | |
for(intmax_t i=0;i<x.shape()[0];i++) | |
{ | |
auto vec = x.slice({i}); | |
float* arr = out.hostPtr()+out.shape()[1]*i; | |
#pragma omp parallel for | |
for(intmax_t j=0;j<outputShape()[1];j++) | |
{ | |
auto diff = vec-weights_[j]; | |
auto proximity = activate(diff); | |
float prox = proximity.sum({1}).hostPtr()[0]/vec.size(); | |
//Naive(?) implementation of boosting | |
#if 0 | |
intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j]; | |
prox *= (float)std::min(actiDiff, 15L)*0.03f + 0.9f; | |
#endif | |
res[j] = {prox, j}; | |
arr[j] = 0; | |
} | |
intmax_t useNum = 8; | |
std::nth_element(res.begin(), res.begin()+useNum, res.end(), [](auto a, auto b){return a.first > b.first;}); | |
//#pragma omp parallel for | |
for(intmax_t j=0;j<useNum;j++) | |
{ | |
intmax_t index = res[j].second; | |
float v = res[j].first; | |
activatedCount_[index]++; | |
if(activatedCount_[index] > currentMaxActivateNum_) | |
currentMaxActivateNum_ = activatedCount_[index]; | |
arr[index] = (2.f*(v*v-(3.f-2.f*v))-1.f)*4.f; | |
//This bug saved me...? | |
// auto& w = weights_[index]; | |
// optimizer.update(w, updateVector(w-vec)); | |
} | |
for(intmax_t j=0;j<useNum;j++) | |
{ | |
intmax_t index = res[j].second; | |
auto& w = weights_[index]; | |
optimizer.update(w, updateVector(w-vec)); | |
} | |
} | |
return out; | |
} | |
//Cannot back propergate trough this now | |
virtual void backword(const Tensor& x, const Tensor& y, | |
Tensor& dx, const Tensor& dy) override | |
{ | |
} | |
//Weight update done in the fordward pass. | |
virtual void update(Optimizer* optimizer) override | |
{ | |
} | |
protected: | |
std::vector<Tensor> weight_; | |
std::vector<intmax_t> activatedCount_; | |
intmax_t currentMaxActivateNum_ = 0; | |
std::minstd_rand eng; | |
std::uniform_real_distribution<float> dist; | |
delegate<SigmoidForward> guessian; | |
delegate<SigmoidForward> updateVector; | |
delegate<SigmoidForward> activate; | |
NestrovOptimizer optimizer; | |
int count_ = 0; | |
}; | |
int main() | |
{ | |
At::XtensorBackend backend; | |
//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable | |
At::NNPackBackend nnpBackend; | |
backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend); | |
backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend); | |
backend.addAlgorithm<SigmoidForward>("guessian", | |
[&backend](const Tensor& in)->Tensor | |
{ | |
auto vec = in.host(); | |
for(auto& v : vec) | |
v = std::exp(-(v*v)); | |
return backend.createTensor(vec, in.shape()); | |
}); | |
backend.addAlgorithm<SigmoidForward>("STDPVector", | |
[&backend](const Tensor& in)->Tensor | |
{ | |
auto vec = in.host(); | |
for(auto& v : vec) | |
v = (v<0)?-0.1:0.04; | |
return backend.createTensor(vec, in.shape()); | |
}); | |
backend.addAlgorithm<SigmoidForward>("activate", | |
[&backend](const Tensor& in)->Tensor | |
{ | |
auto vec = in.host(); | |
for(auto& v : vec) | |
{ | |
float tmp = std::exp(-(v*v));//guessian | |
v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion | |
} | |
return backend.createTensor(vec, in.shape()); | |
}); | |
At::SequentialNetwork net(&backend); | |
auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist"); | |
At::Tensor traningImage = imagesToTensor(dataset.training_images, backend); | |
At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend); | |
At::Tensor testingImage = imagesToTensor(dataset.test_images, backend); | |
At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend); | |
net.add(At::InputLayer(At::Shape({Shape::None, 28*28}))); | |
net.add(STDP(30)); | |
net.add(At::TanhLayer(&backend)); | |
//A fully connected layer is used to perform the classifcation | |
net.add(At::FullyConnectedLayer(10)); | |
net.add(At::SigmoidLayer()); | |
net.compile(); | |
net.summary(); | |
At::SGDOptimizer opt; | |
opt.alpha_ = 0.1f; | |
At::MSELoss loss; | |
size_t epoch = 100; | |
size_t batchSize = 16; | |
int count = 0; | |
auto onBatch = [&](float l) | |
{ | |
int sampleNum = traningImage.shape()[0]; | |
std::cout << count << "/" << sampleNum << "\r" << std::flush; | |
count += batchSize; | |
}; | |
auto onEpoch = [&](float l) | |
{ | |
std::cout << "Epoch Loss: " << l << std::endl; | |
count = 0; | |
}; | |
high_resolution_clock::time_point t1 = high_resolution_clock::now(); | |
net.fit(opt,loss,testingImage,testingLabels,batchSize,epoch,onBatch,onEpoch); | |
high_resolution_clock::time_point t2 = high_resolution_clock::now(); | |
duration<double> time_span = duration_cast<duration<double>>(t2 - t1); | |
std::cout << "It took me " << time_span.count() << " seconds." << std::endl; | |
int correct = 0; | |
for(intmax_t i=0;i<testingImage.shape()[0];i++) | |
{ | |
At::Tensor x = testingImage.slice({i},{1}); | |
At::Tensor res = net.predict(x); | |
int predictLabel = maxElementIndex(res.host()); | |
if(predictLabel == dataset.test_labels[i]) | |
correct++; | |
} | |
std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment