Created
January 12, 2018 14:25
-
-
Save marty1885/a80138f518283eb5db0256acd78e1020 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <Athena/Athena.hpp> | |
#include <Athena/XtensorBackend.hpp> | |
#include <Athena/NNPACKBackend.hpp> | |
#include "mnist_reader.hpp" | |
//Need to use xtensor API due to incomplete Tensor implementation | |
#include <xtensor/xarray.hpp> | |
#include <iostream> | |
#include <chrono> | |
#include <random> | |
using namespace std::chrono; | |
int maxElementIndex(const std::vector<float>& vec) | |
{ | |
return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end())); | |
} | |
At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend) | |
{ | |
std::vector<float> res; | |
res.reserve(arr.size()*arr[0].size()); | |
for(const auto& img : arr) | |
{ | |
for(const auto v : img) | |
res.push_back(v/255.f); | |
} | |
return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend); | |
} | |
std::vector<float> onehot(int ind, int total) | |
{ | |
std::vector<float> vec(total); | |
for(int i=0;i<total;i++) | |
vec[i] = (i==ind)?1.f:0.f; | |
return vec; | |
} | |
At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend) | |
{ | |
std::vector<float> buffer; | |
buffer.reserve(labels.size()*10); | |
for(const auto& label : labels) | |
{ | |
std::vector<float> onehotVec = onehot(label, 10); | |
for(const auto v : onehotVec) | |
buffer.push_back((float)v); | |
} | |
return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend); | |
} | |
using namespace At; | |
//STDP Layer | |
//This is a fully-connected layer that tries to emulate to real neurons work. | |
//Each enuron in this layer compares singal from upper layer to a wheight over | |
//a guessian function. Then the neuron sums up all comparsion results larger than | |
//a pre-determined threashold to produce a score. Then the layer selects the top | |
//N neurons with the highest score to fire signal to the next layer. (Basiclly | |
//it outputs a Sparse Data Representation) | |
class STDP : public FullyConnectedLayer | |
{ | |
public: | |
STDP(intmax_t outputNum):dist(0, 1) | |
{ | |
setOutputShape(Shape({Shape::None, outputNum})); | |
setType("STDP"); | |
} | |
virtual void build() override | |
{ | |
intmax_t outputNum = outputShape()[1]; | |
intmax_t inputNum = inputShape()[1]; | |
//Initialize algorithms | |
guessian = backend()->getAlgorithm<SigmoidForward>("guessian"); | |
updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector"); | |
activate = backend()->getAlgorithm<SigmoidForward>("activate"); | |
//Initialize weights | |
for(intmax_t i=0;i<outputNum;i++) | |
{ | |
weights_.push_back(At::rand(0, 1, {inputNum}, *backend())); | |
} | |
activatedCount_ = std::vector<intmax_t>(outputNum, 0); | |
} | |
virtual Tensor forward(const Tensor& x) override | |
{ | |
Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend()); | |
std::vector<std::pair<float, int>> res(outputShape()[1]); | |
for(intmax_t i=0;i<x.shape()[0];i++) | |
{ | |
auto vec = x.slice({i}); | |
float* arr = out.hostPtr()+out.shape()[1]*i; | |
#pragma omp parallel for | |
for(intmax_t j=0;j<outputShape()[1];j++) | |
{ | |
auto diff = vec-weights_[j]; | |
auto proximity = activate(diff); | |
float prox = proximity.sum({1}).hostPtr()[0]/vec.size(); | |
//Naive(?) implementation of boosting | |
#if 1 | |
intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j]; | |
prox *= (float)std::min(actiDiff, 10L)*0.02f + 0.9f; | |
#endif | |
res[j] = {prox, j}; | |
arr[j] = 0; | |
} | |
std::nth_element(res.begin(), res.begin()+10, res.end(), [](auto a, auto b){return a.first < b.first;}); | |
for(intmax_t j=0;j<10;j++) | |
{ | |
activatedCount_[res[j].second]++; | |
if(activatedCount_[res[j].second] > currentMaxActivateNum_) | |
currentMaxActivateNum_ = activatedCount_[res[j].second]; | |
arr[res[j].second] = 50; | |
optimizer.update(weights_[j], updateVector(weights_[j]-vec)); | |
} | |
} | |
return out; | |
} | |
//Cannot back propergate trough this now | |
virtual void backword(const Tensor& x, const Tensor& y, | |
Tensor& dx, const Tensor& dy) override | |
{ | |
} | |
//Weight update done in the fordward pass. | |
virtual void update(Optimizer* optimizer) override | |
{ | |
} | |
protected: | |
std::vector<Tensor> weight_; | |
std::vector<intmax_t> activatedCount_; | |
intmax_t currentMaxActivateNum_ = 0; | |
std::minstd_rand eng; | |
std::uniform_real_distribution<float> dist; | |
delegate<SigmoidForward> guessian; | |
delegate<SigmoidForward> updateVector; | |
delegate<SigmoidForward> activate; | |
NestrovOptimizer optimizer; | |
}; | |
int main() | |
{ | |
At::XtensorBackend backend; | |
//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable | |
At::NNPackBackend nnpBackend; | |
backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend); | |
backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend); | |
backend.addAlgorithm<SigmoidForward>("guessian", | |
[&backend](const Tensor& in)->Tensor | |
{ | |
auto vec = in.host(); | |
for(auto& v : vec) | |
v = std::exp(-2.f*v*v); | |
return backend.createTensor(vec, in.shape()); | |
}); | |
backend.addAlgorithm<SigmoidForward>("STDPVector", | |
[&backend](const Tensor& in)->Tensor | |
{ | |
auto vec = in.host(); | |
for(auto& v : vec) | |
v = (v<0)?(-0.06):0.2; | |
return backend.createTensor(vec, in.shape()); | |
}); | |
backend.addAlgorithm<SigmoidForward>("activate", | |
[&backend](const Tensor& in)->Tensor | |
{ | |
auto vec = in.host(); | |
for(auto& v : vec) | |
{ | |
float tmp = std::exp(-2.f*v*v);//guessian | |
v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion | |
} | |
return backend.createTensor(vec, in.shape()); | |
}); | |
At::SequentialNetwork net(&backend); | |
auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist"); | |
At::Tensor traningImage = imagesToTensor(dataset.training_images, backend); | |
At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend); | |
At::Tensor testingImage = imagesToTensor(dataset.test_images, backend); | |
At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend); | |
net.add(At::InputLayer(At::Shape({Shape::None, 28*28}))); | |
net.add(STDP(30)); | |
net.add(At::TanhLayer(&backend)); | |
//A fully connected layer is used to perform the classifcation | |
net.add(At::FullyConnectedLayer(10)); | |
net.add(At::SigmoidLayer()); | |
net.compile(); | |
net.summary(); | |
At::SGDOptimizer opt; | |
opt.alpha_ = 0.1f; | |
At::MSELoss loss; | |
size_t epoch = 10; | |
size_t batchSize = 16; | |
int count = 0; | |
auto onBatch = [&](float l) | |
{ | |
int sampleNum = traningImage.shape()[0]; | |
std::cout << count << "/" << sampleNum << "\r" << std::flush; | |
count += batchSize; | |
}; | |
auto onEpoch = [&](float l) | |
{ | |
std::cout << "Epoch Loss: " << l << std::endl; | |
count = 0; | |
}; | |
high_resolution_clock::time_point t1 = high_resolution_clock::now(); | |
net.fit(opt,loss,traningImage,traningLabels,batchSize,epoch,onBatch,onEpoch); | |
high_resolution_clock::time_point t2 = high_resolution_clock::now(); | |
duration<double> time_span = duration_cast<duration<double>>(t2 - t1); | |
std::cout << "It took me " << time_span.count() << " seconds." << std::endl; | |
int correct = 0; | |
for(intmax_t i=0;i<testingImage.shape()[0];i++) | |
{ | |
At::Tensor x = testingImage.slice({i},{1}); | |
At::Tensor res = net.predict(x); | |
int predictLabel = maxElementIndex(res.host()); | |
if(predictLabel == dataset.test_labels[i]) | |
correct++; | |
} | |
std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment