marty1885 · January 14, 2018 05:07
diff --git a/DSP.cpp b/DSP.cpp
 //#define DISP_WEIGHT 


 #include <Athena/Athena.hpp>
 #include <Athena/XtensorBackend.hpp>
 #include <Athena/NNPACKBackend.hpp>

 #include "mnist_reader.hpp"

 //Need to use xtensor API due to incomplete Tensor implementation
 #include <xtensor/xarray.hpp>

 #include <iostream>
 #include <chrono>
 #include <random>

 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 using namespace cv;

 Mat tensor2Mat(const At::Tensor& image)
 {
 	Mat res(28,28, CV_32F);
 	auto data = image.host();
 	memcpy(res.data, &data[0], data.size()*sizeof(float));
 	Mat sc;
 	resize(res,sc,{100,100});
 	return sc;
 }


 using namespace std::chrono;

 int maxElementIndex(const std::vector<float>& vec)
 {
 	return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()));
 }

 At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend)
 {
 	std::vector<float> res;
 	res.reserve(arr.size()*arr[0].size());
 	for(const auto& img : arr)
 	{
 		for(const auto v : img)
 			res.push_back(v/255.f);
 	}

 	return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend);
 }

 std::vector<float> onehot(int ind, int total)
 {
 	std::vector<float> vec(total);
 	for(int i=0;i<total;i++)
 		vec[i] = (i==ind)?1.f:0.f;
 	return vec;
 }

 At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend)
 {
 	std::vector<float> buffer;
 	buffer.reserve(labels.size()*10);

 	for(const auto& label : labels)
 	{
 		std::vector<float> onehotVec = onehot(label, 10);
 		for(const auto v : onehotVec)
 			buffer.push_back((float)v);
 	}

 	return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend);
 }

 using namespace At;

 //STDP Layer
 //This is a fully-connected layer that tries to emulate to real neurons work.
 //Each enuron in this layer compares singal from upper layer to a wheight over
 //a guessian function. Then the neuron sums up all comparsion results larger than
 //a pre-determined threashold to produce a score. Then the layer selects the top
 //N neurons with the highest score to fire signal to the next layer. (Basiclly
 //it outputs a Sparse Data Representation)
 class STDP : public FullyConnectedLayer
 {
 public:
 	STDP(intmax_t outputNum):dist(0, 1)
 	{
 		setOutputShape(Shape({Shape::None, outputNum}));
 		setType("STDP");
 	}
 	
 	virtual void build() override
 	{
 		intmax_t outputNum = outputShape()[1];
 		intmax_t inputNum = inputShape()[1];
 		//Initialize algorithms
 		guessian = backend()->getAlgorithm<SigmoidForward>("guessian");
 		updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector");
 		activate = backend()->getAlgorithm<SigmoidForward>("activate");
 		
 		//Initialize weights
 		for(intmax_t i=0;i<outputNum;i++)
 		{
 			weights_.push_back(At::rand(0, 1, {inputNum}, *backend()));
 		}
 		activatedCount_ = std::vector<intmax_t>(outputNum, 0);
 	}
 	
 	virtual Tensor forward(const Tensor& x) override
 	{
 		#if 1
 		if(count_++ < 3750)
 			return bootstrap(x);
 		else
 			return run(x);
 		#else
 			return bootstrap(x);
 		#endif
 	}
 	
 	Tensor bootstrap(const Tensor& x) 
 	{
 		Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
 		intmax_t outputNum = outputShape()[1];
 		std::vector<std::pair<float, int>> res(outputShape()[1]);
 		for(intmax_t i=0;i<x.shape()[0];i++)
 		{
 			auto vec = x.slice({i});
 			float* arr = out.hostPtr()+out.shape()[1]*i;
 			#pragma omp parallel for
 			for(intmax_t j=0;j<outputShape()[1];j++)
 			{
 				auto diff = vec-weights_[j];
 				auto proximity = activate(diff);
 				float prox = proximity.sum({1}).hostPtr()[0]/vec.size();
 				
 				//Naive(?) implementation of boosting
 				#if 1
 				intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
 				prox *= (float)std::min(actiDiff, 15L)*0.03f + 0.9f;
 				#endif

 				res[j] = {prox, j};
 				arr[j] = 0;
 			}
 			intmax_t useNum = 5;
 			std::nth_element(res.begin(), res.begin()+useNum, res.end(), [](auto a, auto b){return a.first > b.first;});
 			//#pragma omp parallel for
 			for(intmax_t j=0;j<useNum;j++)
 			{
 				intmax_t index = res[j].second;
 				float v = res[j].first;
 				activatedCount_[index]++;
 				if(activatedCount_[index] > currentMaxActivateNum_)
 					currentMaxActivateNum_ = activatedCount_[index];
 				arr[index] = (2.f*(v*v-(3.f-2.f*v))-1.f)*4.f;
 				
 				//This bug saved me...?
 // 				auto& w = weights_[index];
 // 				optimizer.update(w, updateVector(w-vec));
 			}
 			for(intmax_t j=0;j<useNum;j++)
 			{
 				intmax_t index = res[j].second;
 				auto& w = weights_[index];
 				optimizer.update(w, updateVector(w-vec));
 			}
 			#ifdef DISP_WEIGHT
 			for(int i=0;i<30;i++)
 			{
 				Mat asd = tensor2Mat(weights_[i]);
 				imshow("display" + std::to_string(i), asd);
 				
 			}
 			waitKey(3);
 			#endif
 		}

 		return out;
 	}
 	
 	Tensor run(const Tensor& x) 
 	{
 		Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
 		intmax_t outputNum = outputShape()[1];
 		std::vector<std::pair<float, int>> res(outputShape()[1]);
 		for(intmax_t i=0;i<x.shape()[0];i++)
 		{
 			auto vec = x.slice({i});
 			float* arr = out.hostPtr()+out.shape()[1]*i;
 			#pragma omp parallel for
 			for(intmax_t j=0;j<outputShape()[1];j++)
 			{
 				auto diff = vec-weights_[j];
 				auto proximity = activate(diff);
 				float prox = proximity.sum({1}).hostPtr()[0]/vec.size();
 				
 				//Naive(?) implementation of boosting
 				#if 0
 				intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
 				prox *= (float)std::min(actiDiff, 15L)*0.03f + 0.9f;
 				#endif

 				res[j] = {prox, j};
 				arr[j] = 0;
 			}
 			intmax_t useNum = 8;
 			std::nth_element(res.begin(), res.begin()+useNum, res.end(), [](auto a, auto b){return a.first > b.first;});
 			//#pragma omp parallel for
 			for(intmax_t j=0;j<useNum;j++)
 			{
 				intmax_t index = res[j].second;
 				float v = res[j].first;
 				activatedCount_[index]++;
 				if(activatedCount_[index] > currentMaxActivateNum_)
 					currentMaxActivateNum_ = activatedCount_[index];
 				arr[index] = (2.f*(v*v-(3.f-2.f*v))-1.f)*4.f;
 				
 				//This bug saved me...?
 // 				auto& w = weights_[index];
 // 				optimizer.update(w, updateVector(w-vec));
 			}
 			for(intmax_t j=0;j<useNum;j++)
 			{
 				intmax_t index = res[j].second;
 				auto& w = weights_[index];
 				optimizer.update(w, updateVector(w-vec));
 			}
 		}

 		return out;
 	}

 	//Cannot back propergate trough this now
 	virtual void backword(const Tensor& x, const Tensor& y,
 		Tensor& dx, const Tensor& dy) override
 	{
 	}

 	//Weight update done in the fordward pass. 
 	virtual void update(Optimizer* optimizer) override
 	{
 	}
 protected:
 	std::vector<Tensor> weight_;
 	std::vector<intmax_t> activatedCount_;
 	intmax_t currentMaxActivateNum_ = 0;
 	
 	std::minstd_rand eng;
 	std::uniform_real_distribution<float> dist;
 	
 	delegate<SigmoidForward> guessian;
 	delegate<SigmoidForward> updateVector;
 	delegate<SigmoidForward> activate;
 	NestrovOptimizer optimizer;
 	int count_ = 0;
 };

 int main()
 {
 	At::XtensorBackend backend;

 	//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable
 	At::NNPackBackend nnpBackend;
 	backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend);
 	backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend);
 	
 	backend.addAlgorithm<SigmoidForward>("guessian",
 	[&backend](const Tensor& in)->Tensor
 	{
 		auto vec = in.host();
 		for(auto& v : vec)
 			v = std::exp(-(v*v));
 		return backend.createTensor(vec, in.shape());
 	});
 	
 	backend.addAlgorithm<SigmoidForward>("STDPVector",
 	[&backend](const Tensor& in)->Tensor
 	{
 		auto vec = in.host();
 		for(auto& v : vec)
 			v = (v<0)?-0.1:0.04;
 		return backend.createTensor(vec, in.shape());
 	});
 	
 	backend.addAlgorithm<SigmoidForward>("activate",
 	[&backend](const Tensor& in)->Tensor
 	{
 		auto vec = in.host();
 		for(auto& v : vec)
 		{
 			float tmp = std::exp(-(v*v));//guessian
 			v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion
 		}
 		return backend.createTensor(vec, in.shape());
 	});

 	At::SequentialNetwork net(&backend);

 	auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist");
 	At::Tensor traningImage = imagesToTensor(dataset.training_images, backend);
 	At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend);
 	At::Tensor testingImage = imagesToTensor(dataset.test_images, backend);
 	At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend);

 	net.add(At::InputLayer(At::Shape({Shape::None, 28*28})));
 	net.add(STDP(30));
 	net.add(At::TanhLayer(&backend));
 	
 	
 	//A fully connected layer is used to perform the classifcation
 	net.add(At::FullyConnectedLayer(10));
 	net.add(At::SigmoidLayer());
 	net.compile();

 	net.summary();

 	At::SGDOptimizer opt;
 	opt.alpha_ = 0.1f;
 	At::MSELoss loss;

 	size_t epoch = 100;
 	size_t batchSize = 16;

 	int count = 0;
 	auto onBatch = [&](float l)
 	{
 		int sampleNum = traningImage.shape()[0];
 		std::cout << count << "/" << sampleNum << "\r" << std::flush;
 		count += batchSize;
 	};

 	auto onEpoch = [&](float l)
 	{
 		std::cout << "Epoch Loss: " << l << std::endl;
 		count = 0;
 	};

 	high_resolution_clock::time_point t1 = high_resolution_clock::now();

 	net.fit(opt,loss,testingImage,testingLabels,batchSize,epoch,onBatch,onEpoch);

 	high_resolution_clock::time_point t2 = high_resolution_clock::now();
 	duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
 	std::cout << "It took me " << time_span.count() << " seconds." << std::endl;
 	
 	int correct = 0;
 	for(intmax_t i=0;i<testingImage.shape()[0];i++)
 	{
 		At::Tensor x = testingImage.slice({i},{1});
 		At::Tensor res = net.predict(x);
 		int predictLabel = maxElementIndex(res.host());
 		if(predictLabel == dataset.test_labels[i])
 			correct++;
 	}
 	std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl;
 }
	//#define DISP_WEIGHT


	#include <Athena/Athena.hpp>
	#include <Athena/XtensorBackend.hpp>
	#include <Athena/NNPACKBackend.hpp>

	#include "mnist_reader.hpp"

	//Need to use xtensor API due to incomplete Tensor implementation
	#include <xtensor/xarray.hpp>

	#include <iostream>
	#include <chrono>
	#include <random>

	#include <opencv2/core/core.hpp>
	#include <opencv2/highgui/highgui.hpp>
	#include <opencv2/imgproc/imgproc.hpp>
	using namespace cv;

	Mat tensor2Mat(const At::Tensor& image)
	{
	Mat res(28,28, CV_32F);
	auto data = image.host();
	memcpy(res.data, &data[0], data.size()*sizeof(float));
	Mat sc;
	resize(res,sc,{100,100});
	return sc;
	}


	using namespace std::chrono;

	int maxElementIndex(const std::vector<float>& vec)
	{
	return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()));
	}

	At::Tensor imagesToTensor(const std::vector<std::vector<uint8_t>>& arr, At::Backend& backend)
	{
	std::vector<float> res;
	res.reserve(arr.size()*arr[0].size());
	for(const auto& img : arr)
	{
	for(const auto v : img)
	res.push_back(v/255.f);
	}

	return At::Tensor(res,{(intmax_t)arr.size(),(intmax_t)arr[0].size()}, backend);
	}

	std::vector<float> onehot(int ind, int total)
	{
	std::vector<float> vec(total);
	for(int i=0;i<total;i++)
	vec[i] = (i==ind)?1.f:0.f;
	return vec;
	}

	At::Tensor labelsToOnehot(const std::vector<uint8_t>& labels, At::Backend& backend)
	{
	std::vector<float> buffer;
	buffer.reserve(labels.size()*10);

	for(const auto& label : labels)
	{
	std::vector<float> onehotVec = onehot(label, 10);
	for(const auto v : onehotVec)
	buffer.push_back((float)v);
	}

	return At::Tensor(buffer, {(intmax_t)labels.size(), 10}, backend);
	}

	using namespace At;

	//STDP Layer
	//This is a fully-connected layer that tries to emulate to real neurons work.
	//Each enuron in this layer compares singal from upper layer to a wheight over
	//a guessian function. Then the neuron sums up all comparsion results larger than
	//a pre-determined threashold to produce a score. Then the layer selects the top
	//N neurons with the highest score to fire signal to the next layer. (Basiclly
	//it outputs a Sparse Data Representation)
	class STDP : public FullyConnectedLayer
	{
	public:
	STDP(intmax_t outputNum):dist(0, 1)
	{
	setOutputShape(Shape({Shape::None, outputNum}));
	setType("STDP");
	}

	virtual void build() override
	{
	intmax_t outputNum = outputShape()[1];
	intmax_t inputNum = inputShape()[1];
	//Initialize algorithms
	guessian = backend()->getAlgorithm<SigmoidForward>("guessian");
	updateVector = backend()->getAlgorithm<SigmoidForward>("STDPVector");
	activate = backend()->getAlgorithm<SigmoidForward>("activate");

	//Initialize weights
	for(intmax_t i=0;i<outputNum;i++)
	{
	weights_.push_back(At::rand(0, 1, {inputNum}, *backend()));
	}
	activatedCount_ = std::vector<intmax_t>(outputNum, 0);
	}

	virtual Tensor forward(const Tensor& x) override
	{
	#if 1
	if(count_++ < 3750)
	return bootstrap(x);
	else
	return run(x);
	#else
	return bootstrap(x);
	#endif
	}

	Tensor bootstrap(const Tensor& x)
	{
	Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
	intmax_t outputNum = outputShape()[1];
	std::vector<std::pair<float, int>> res(outputShape()[1]);
	for(intmax_t i=0;i<x.shape()[0];i++)
	{
	auto vec = x.slice({i});
	float* arr = out.hostPtr()+out.shape()[1]*i;
	#pragma omp parallel for
	for(intmax_t j=0;j<outputShape()[1];j++)
	{
	auto diff = vec-weights_[j];
	auto proximity = activate(diff);
	float prox = proximity.sum({1}).hostPtr()[0]/vec.size();

	//Naive(?) implementation of boosting
	#if 1
	intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
	prox = (float)std::min(actiDiff, 15L)0.03f + 0.9f;
	#endif

	res[j] = {prox, j};
	arr[j] = 0;
	}
	intmax_t useNum = 5;
	std::nth_element(res.begin(), res.begin()+useNum, res.end(), [](auto a, auto b){return a.first > b.first;});
	//#pragma omp parallel for
	for(intmax_t j=0;j<useNum;j++)
	{
	intmax_t index = res[j].second;
	float v = res[j].first;
	activatedCount_[index]++;
	if(activatedCount_[index] > currentMaxActivateNum_)
	currentMaxActivateNum_ = activatedCount_[index];
	arr[index] = (2.f(vv-(3.f-2.fv))-1.f)4.f;

	//This bug saved me...?
	// auto& w = weights_[index];
	// optimizer.update(w, updateVector(w-vec));
	}
	for(intmax_t j=0;j<useNum;j++)
	{
	intmax_t index = res[j].second;
	auto& w = weights_[index];
	optimizer.update(w, updateVector(w-vec));
	}
	#ifdef DISP_WEIGHT
	for(int i=0;i<30;i++)
	{
	Mat asd = tensor2Mat(weights_[i]);
	imshow("display" + std::to_string(i), asd);

	}
	waitKey(3);
	#endif
	}

	return out;
	}

	Tensor run(const Tensor& x)
	{
	Tensor out = At::zeros({x.shape()[0], outputShape()[1]}, *backend());
	intmax_t outputNum = outputShape()[1];
	std::vector<std::pair<float, int>> res(outputShape()[1]);
	for(intmax_t i=0;i<x.shape()[0];i++)
	{
	auto vec = x.slice({i});
	float* arr = out.hostPtr()+out.shape()[1]*i;
	#pragma omp parallel for
	for(intmax_t j=0;j<outputShape()[1];j++)
	{
	auto diff = vec-weights_[j];
	auto proximity = activate(diff);
	float prox = proximity.sum({1}).hostPtr()[0]/vec.size();

	//Naive(?) implementation of boosting
	#if 0
	intmax_t actiDiff = currentMaxActivateNum_ - activatedCount_[j];
	prox = (float)std::min(actiDiff, 15L)0.03f + 0.9f;
	#endif

	res[j] = {prox, j};
	arr[j] = 0;
	}
	intmax_t useNum = 8;
	std::nth_element(res.begin(), res.begin()+useNum, res.end(), [](auto a, auto b){return a.first > b.first;});
	//#pragma omp parallel for
	for(intmax_t j=0;j<useNum;j++)
	{
	intmax_t index = res[j].second;
	float v = res[j].first;
	activatedCount_[index]++;
	if(activatedCount_[index] > currentMaxActivateNum_)
	currentMaxActivateNum_ = activatedCount_[index];
	arr[index] = (2.f(vv-(3.f-2.fv))-1.f)4.f;

	//This bug saved me...?
	// auto& w = weights_[index];
	// optimizer.update(w, updateVector(w-vec));
	}
	for(intmax_t j=0;j<useNum;j++)
	{
	intmax_t index = res[j].second;
	auto& w = weights_[index];
	optimizer.update(w, updateVector(w-vec));
	}
	}

	return out;
	}

	//Cannot back propergate trough this now
	virtual void backword(const Tensor& x, const Tensor& y,
	Tensor& dx, const Tensor& dy) override
	{
	}

	//Weight update done in the fordward pass.
	virtual void update(Optimizer* optimizer) override
	{
	}
	protected:
	std::vector<Tensor> weight_;
	std::vector<intmax_t> activatedCount_;
	intmax_t currentMaxActivateNum_ = 0;

	std::minstd_rand eng;
	std::uniform_real_distribution<float> dist;

	delegate<SigmoidForward> guessian;
	delegate<SigmoidForward> updateVector;
	delegate<SigmoidForward> activate;
	NestrovOptimizer optimizer;
	int count_ = 0;
	};

	int main()
	{
	At::XtensorBackend backend;

	//Use the NNPACK backend to accelerate things. Remove if NNPACK is not avliable
	At::NNPackBackend nnpBackend;
	backend.useAlgorithm<At::FCForwardFunction>("fullyconnectedForward", nnpBackend);
	backend.useAlgorithm<At::FCBackwardFunction>("fullyconnectedBackward", nnpBackend);

	backend.addAlgorithm<SigmoidForward>("guessian",
	[&backend](const Tensor& in)->Tensor
	{
	auto vec = in.host();
	for(auto& v : vec)
	v = std::exp(-(v*v));
	return backend.createTensor(vec, in.shape());
	});

	backend.addAlgorithm<SigmoidForward>("STDPVector",
	[&backend](const Tensor& in)->Tensor
	{
	auto vec = in.host();
	for(auto& v : vec)
	v = (v<0)?-0.1:0.04;
	return backend.createTensor(vec, in.shape());
	});

	backend.addAlgorithm<SigmoidForward>("activate",
	[&backend](const Tensor& in)->Tensor
	{
	auto vec = in.host();
	for(auto& v : vec)
	{
	float tmp = std::exp(-(v*v));//guessian
	v = tmp > 0.35? tmp : -0.1*tmp;//threasholding the comparsion
	}
	return backend.createTensor(vec, in.shape());
	});

	At::SequentialNetwork net(&backend);

	auto dataset = mnist::read_dataset<std::vector, std::vector, uint8_t, uint8_t>("../mnist");
	At::Tensor traningImage = imagesToTensor(dataset.training_images, backend);
	At::Tensor traningLabels = labelsToOnehot(dataset.training_labels, backend);
	At::Tensor testingImage = imagesToTensor(dataset.test_images, backend);
	At::Tensor testingLabels = labelsToOnehot(dataset.test_labels, backend);

	net.add(At::InputLayer(At::Shape({Shape::None, 28*28})));
	net.add(STDP(30));
	net.add(At::TanhLayer(&backend));


	//A fully connected layer is used to perform the classifcation
	net.add(At::FullyConnectedLayer(10));
	net.add(At::SigmoidLayer());
	net.compile();

	net.summary();

	At::SGDOptimizer opt;
	opt.alpha_ = 0.1f;
	At::MSELoss loss;

	size_t epoch = 100;
	size_t batchSize = 16;

	int count = 0;
	auto onBatch = [&](float l)
	{
	int sampleNum = traningImage.shape()[0];
	std::cout << count << "/" << sampleNum << "\r" << std::flush;
	count += batchSize;
	};

	auto onEpoch = [&](float l)
	{
	std::cout << "Epoch Loss: " << l << std::endl;
	count = 0;
	};

	high_resolution_clock::time_point t1 = high_resolution_clock::now();

	net.fit(opt,loss,testingImage,testingLabels,batchSize,epoch,onBatch,onEpoch);

	high_resolution_clock::time_point t2 = high_resolution_clock::now();
	duration<double> time_span = duration_cast<duration<double>>(t2 - t1);
	std::cout << "It took me " << time_span.count() << " seconds." << std::endl;

	int correct = 0;
	for(intmax_t i=0;i<testingImage.shape()[0];i++)
	{
	At::Tensor x = testingImage.slice({i},{1});
	At::Tensor res = net.predict(x);
	int predictLabel = maxElementIndex(res.host());
	if(predictLabel == dataset.test_labels[i])
	correct++;
	}
	std::cout << "Accuracy: " << correct/(float)testingImage.shape()[0] << std::endl;
	}