Jerdak · December 20, 2015 08:39
diff --git a/main.cpp b/main.cpp
 #include "matrixnn.hpp"
 
 #include <cmath>
 #include <thread>
 #include <vector>
 #include <cstdio>
 #include <cstring>
 #include <random>
 #include <chrono>
 
 using namespace std::chrono;
 
 // MatrixNN example usage and timing comparisons between serial and parallel processing
 //
 // Note: clang 3.1.2 has a small bug [1] in its thread compiling because it lacks the correct macros in the threading headers.
 // Use the following to compile for clang:
 // 
 // clang++ -std=c++11 -pthread -stdlib=libstdc++ -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
 //
 // [1] - http://stackoverflow.com/questions/16171401/why-does-this-simple-c11-threading-example-fail-when-compiled-with-clang-3-2
 int main(int argc, char *argv[]){
 	std::vector<int> ranges;	
 	if(argc == 2){
 		ranges.push_back(atoi(argv[1]));
 	} else {
 		ranges = {1,2,5,10,15,25,35};;
 	}
  	typedef std::chrono::high_resolution_clock Clock;
 	printf("matrix_size,slow_time,fast_time,match\n");
 	for(auto i: ranges){
 		MatrixNN a(i),b(i),c1(1),c2(1);
 		a.FillRandom();
 		b.FillRandom();
 		duration<double> time_span_slow,time_span_fast;
 		{
 			auto t1 = Clock::now();
 			c1 = a.SlowMult(b);
 			auto t2 = Clock::now();
 			time_span_slow = duration_cast<std::chrono::duration<double>>(t2 - t1);
 		}

 		{
 			auto t1 = Clock::now();
 			c2 = a.QuickMult(b);
 			auto t2 = Clock::now();
 			time_span_fast = duration_cast<std::chrono::duration<double>>(t2 - t1);
 		}

 		printf("%d,%f,%f,%d\n",i,time_span_slow.count(),time_span_fast.count(),(int)(c1==c2));
 	}
  return 1;
 }
diff --git a/MatrixNN.hpp b/MatrixNN.hpp
 #ifndef __MATRIXNN_H__
 #define __MATRIXNN_H__
 #include "thread_helpers.hpp"

 #include <cstdio>
 #include <cstring>
 #include <random>
 #include <vector>

 class MatrixNN{
 public:
  MatrixNN(const int &n){
 		n_ = n;
 		data_.resize(n_*n_);
 		std::fill(data_.begin(),data_.end(),0.0);
 	}
 	
 	MatrixNN(const MatrixNN &m){
 		n_ = m.n_;
 		std::copy(m.data_.begin(), m.data_.end(),data_.begin());
 	}
 	
 	/// Set to identity matrix
 	void Identity() {
 		for(int r = 0; r < n_; r++){
 			for(int c = 0; c < n_; c++){
 				if(r==c)(*this)(r,c) = 1.0;
 			}
 		}	
 	}
 	/// Fill matrix with random data
 	void FillRandom(){
 		std::default_random_engine generator;
 		std::uniform_real_distribution<double> distribution(1.0,2.0);
 		for(int i = 0; i < data_.size(); i++){
 			data_[i] = distribution(generator);		
 		}
 	}
 	
 	/// Get size of matrix (returns NxN)
 	int get_size()const{return data_.size();}
 	
 	/// Get number of rows/cols (matrix is symmetrical)
 	int get_n()const{return n_;}

 	/// Multiply matrices (concurrent threading)
 	MatrixNN QuickMult(const MatrixNN& m)const{
 		MatrixNN tmp(m.get_n());
 		const MatrixNN *a = this;
 		auto myFunc = [&](unsigned int i){
 			int r = i/tmp.get_n();
 			int c = i-(r*tmp.get_n());
 			for(int rm = 0; rm < n_; rm++){
 				tmp(r,c) += (*a)(r,rm) * m(rm,c);
 			}
 		};
 		parallelFor(data_.size(),myFunc);
 		return tmp;
 	}
 	
 	/// Multiply matrices (single thread)
 	MatrixNN SlowMult(const MatrixNN& m)const{
 		MatrixNN tmp(m.get_n());
 		const MatrixNN *a = this;
 		for(int r = 0; r < n_; r++){
 			for(int c = 0; c < n_; c++){
 				for(int rm = 0; rm < n_; rm++){
 					tmp(r,c) += (*this)(r,rm) * m(rm,c); 
 				}
 			}
 		}
 		return tmp;
 	}
 	
 	/// Multiply matrices (single thread)
 	MatrixNN operator*(const MatrixNN& m)const{
 		MatrixNN tmp(m.get_n());
 		const MatrixNN *a = this;
 		for(int r = 0; r < n_; r++){
 			for(int c = 0; c < n_; c++){
 				for(int rm = 0; rm < n_; rm++){
 					tmp(r,c) += (*this)(r,rm) * m(rm,c); 
 				}
 			}
 		}
 		return tmp;
 	}
 	
 	/// Equality operator compares size and cells
 	bool operator==(const MatrixNN &rhs)const{
 		if(data_.size() != rhs.get_size())return false;
 		if(n_ != rhs.get_n())return false;
 	
 		for(int i = 0; i < data_.size(); i++){
 			if(data_[i] != rhs.data_[i])return false;
 		}	
 		return true;
 	}
 	
 	void operator=(const MatrixNN &m){
 		n_ = m.n_;
 		data_.resize(n_*n_);
 		std::copy(m.data_.begin(), m.data_.end(),data_.begin());
 	}
 	
 	double operator()(const int& row, const int& col)const{
 		int index = row * n_ + col;
 		return data_[index]; 
 	}
 	
 	double &operator()(const int& row, const int& col){
 		int index = row * n_ + col;
 		return data_[index]; 
 	}
 	
 	double &operator[](const int& index){
 		return data_[index];
 	}
 private:
 	std::vector<double> data_;
 	int n_;
 };

 #endif //__MATRIXNN_H__
diff --git a/thread_helpers.hpp b/thread_helpers.hpp
 #ifndef __THREAD_HELPERS_H__
 #define __THREAD_HELPERS_H__

 #include <cmath>
 #include <thread>
 #include <vector>
 #include <random>
 #include <chrono>

 /// Concurrently iterate through a vector split across 1 or more concurrent threads
 ///
 /// Modified version of [1].  
 ///
 /// [1] - http://stackoverflow.com/questions/17235053/c11-alternative-to-openmp-with-clang
 inline void parallelFor(const unsigned int size, std::function<void(const unsigned int)> func) {
  const unsigned int nbThreads = std::thread::hardware_concurrency();
 	std::vector < std::thread > threads;
 	for (unsigned int idThread = 0; idThread < nbThreads; idThread++) {
 		auto threadFunc = [=]() {
 			for (unsigned int i=idThread; i<size; i+=nbThreads) {
 				func(i);
 			}
 		};
 		threads.push_back(std::thread(threadFunc));
 	}
 	for (auto & t : threads) t.join();
 }


 /// std implementation of sleep(milliseconds)
 inline void sleep(int ms){
 	std::chrono::milliseconds duration(ms);
 	std::this_thread::sleep_for(duration);
 }
 #endif //__THREAD_HELPERS_H__
	#include "matrixnn.hpp"

	#include <cmath>
	#include <thread>
	#include <vector>
	#include <cstdio>
	#include <cstring>
	#include <random>
	#include <chrono>

	using namespace std::chrono;

	// MatrixNN example usage and timing comparisons between serial and parallel processing
	//
	// Note: clang 3.1.2 has a small bug [1] in its thread compiling because it lacks the correct macros in the threading headers.
	// Use the following to compile for clang:
	//
	// clang++ -std=c++11 -pthread -stdlib=libstdc++ -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -D__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
	//
	// [1] - http://stackoverflow.com/questions/16171401/why-does-this-simple-c11-threading-example-fail-when-compiled-with-clang-3-2
	int main(int argc, char *argv[]){
	std::vector<int> ranges;
	if(argc == 2){
	ranges.push_back(atoi(argv[1]));
	} else {
	ranges = {1,2,5,10,15,25,35};;
	}
	typedef std::chrono::high_resolution_clock Clock;
	printf("matrix_size,slow_time,fast_time,match\n");
	for(auto i: ranges){
	MatrixNN a(i),b(i),c1(1),c2(1);
	a.FillRandom();
	b.FillRandom();
	duration<double> time_span_slow,time_span_fast;
	{
	auto t1 = Clock::now();
	c1 = a.SlowMult(b);
	auto t2 = Clock::now();
	time_span_slow = duration_cast<std::chrono::duration<double>>(t2 - t1);
	}

	{
	auto t1 = Clock::now();
	c2 = a.QuickMult(b);
	auto t2 = Clock::now();
	time_span_fast = duration_cast<std::chrono::duration<double>>(t2 - t1);
	}

	printf("%d,%f,%f,%d\n",i,time_span_slow.count(),time_span_fast.count(),(int)(c1==c2));
	}
	return 1;
	}
	#ifndef __MATRIXNN_H__
	#define __MATRIXNN_H__
	#include "thread_helpers.hpp"

	#include <cstdio>
	#include <cstring>
	#include <random>
	#include <vector>

	class MatrixNN{
	public:
	MatrixNN(const int &n){
	n_ = n;
	data_.resize(n_*n_);
	std::fill(data_.begin(),data_.end(),0.0);
	}

	MatrixNN(const MatrixNN &m){
	n_ = m.n_;
	std::copy(m.data_.begin(), m.data_.end(),data_.begin());
	}

	/// Set to identity matrix
	void Identity() {
	for(int r = 0; r < n_; r++){
	for(int c = 0; c < n_; c++){
	if(r==c)(*this)(r,c) = 1.0;
	}
	}
	}
	/// Fill matrix with random data
	void FillRandom(){
	std::default_random_engine generator;
	std::uniform_real_distribution<double> distribution(1.0,2.0);
	for(int i = 0; i < data_.size(); i++){
	data_[i] = distribution(generator);
	}
	}

	/// Get size of matrix (returns NxN)
	int get_size()const{return data_.size();}

	/// Get number of rows/cols (matrix is symmetrical)
	int get_n()const{return n_;}

	/// Multiply matrices (concurrent threading)
	MatrixNN QuickMult(const MatrixNN& m)const{
	MatrixNN tmp(m.get_n());
	const MatrixNN *a = this;
	auto myFunc = [&](unsigned int i){
	int r = i/tmp.get_n();
	int c = i-(r*tmp.get_n());
	for(int rm = 0; rm < n_; rm++){
	tmp(r,c) += (a)(r,rm) m(rm,c);
	}
	};
	parallelFor(data_.size(),myFunc);
	return tmp;
	}

	/// Multiply matrices (single thread)
	MatrixNN SlowMult(const MatrixNN& m)const{
	MatrixNN tmp(m.get_n());
	const MatrixNN *a = this;
	for(int r = 0; r < n_; r++){
	for(int c = 0; c < n_; c++){
	for(int rm = 0; rm < n_; rm++){
	tmp(r,c) += (this)(r,rm) m(rm,c);
	}
	}
	}
	return tmp;
	}

	/// Multiply matrices (single thread)
	MatrixNN operator*(const MatrixNN& m)const{
	MatrixNN tmp(m.get_n());
	const MatrixNN *a = this;
	for(int r = 0; r < n_; r++){
	for(int c = 0; c < n_; c++){
	for(int rm = 0; rm < n_; rm++){
	tmp(r,c) += (this)(r,rm) m(rm,c);
	}
	}
	}
	return tmp;
	}

	/// Equality operator compares size and cells
	bool operator==(const MatrixNN &rhs)const{
	if(data_.size() != rhs.get_size())return false;
	if(n_ != rhs.get_n())return false;

	for(int i = 0; i < data_.size(); i++){
	if(data_[i] != rhs.data_[i])return false;
	}
	return true;
	}

	void operator=(const MatrixNN &m){
	n_ = m.n_;
	data_.resize(n_*n_);
	std::copy(m.data_.begin(), m.data_.end(),data_.begin());
	}

	double operator()(const int& row, const int& col)const{
	int index = row * n_ + col;
	return data_[index];
	}

	double &operator()(const int& row, const int& col){
	int index = row * n_ + col;
	return data_[index];
	}

	double &operator[](const int& index){
	return data_[index];
	}
	private:
	std::vector<double> data_;
	int n_;
	};

	#endif //__MATRIXNN_H__
	#ifndef __THREAD_HELPERS_H__
	#define __THREAD_HELPERS_H__

	#include <cmath>
	#include <thread>
	#include <vector>
	#include <random>
	#include <chrono>

	/// Concurrently iterate through a vector split across 1 or more concurrent threads
	///
	/// Modified version of [1].
	///
	/// [1] - http://stackoverflow.com/questions/17235053/c11-alternative-to-openmp-with-clang
	inline void parallelFor(const unsigned int size, std::function<void(const unsigned int)> func) {
	const unsigned int nbThreads = std::thread::hardware_concurrency();
	std::vector < std::thread > threads;
	for (unsigned int idThread = 0; idThread < nbThreads; idThread++) {
	auto threadFunc = [=]() {
	for (unsigned int i=idThread; i<size; i+=nbThreads) {
	func(i);
	}
	};
	threads.push_back(std::thread(threadFunc));
	}
	for (auto & t : threads) t.join();
	}


	/// std implementation of sleep(milliseconds)
	inline void sleep(int ms){
	std::chrono::milliseconds duration(ms);
	std::this_thread::sleep_for(duration);
	}
	#endif //__THREAD_HELPERS_H__