vfdev-5 · June 24, 2016 07:34
diff --git a/CMakeLists.txt b/CMakeLists.txt
 project(FalseParallelSharing)
 cmake_minimum_required(VERSION 2.8)
 aux_source_directory(. SRC_LIST)
 add_executable(${PROJECT_NAME} ${SRC_LIST})
diff --git a/main.cpp b/main.cpp

 // STD
 #include <iostream>
 #include <algorithm>
 #include <vector>
 #include <thread>
 #include <functional>
 #include <chrono>

 const unsigned int MAX_NB_THREADS = 8;

 //***********************************************************************************************

 void false_parallel_sharing_compute(int nbThreads, const std::vector<int> & data, 
 									std::vector<unsigned int> & results, int p)
 {
 	results[p] = 0;
 	unsigned int chunkSize = data.size() / nbThreads + 1;
 	unsigned int start = p * chunkSize;
 	unsigned int end = start + chunkSize > data.size() ? data.size() : start + chunkSize;
 	for (unsigned int i=start; i<end; i++)
 	{
 		if (data[i] % 2) 
 		{
 			// Multiple access to closely placed memory place causes performance slow down
 			++results[p];
 		}
 	}	
 }

 //***********************************************************************************************

 void better_parallel_sharing_compute(int nbThreads, const std::vector<int> & data, 
 									std::vector<unsigned int> & results, int p)
 {
 	// Here local variable avoids multiple access to results external variable
 	unsigned int counter = 0;
 	unsigned int chunkSize = data.size() / nbThreads + 1;
 	unsigned int start = p * chunkSize;
 	unsigned int end = start + chunkSize > data.size() ? data.size() : start + chunkSize;
 	for (unsigned int i=start; i<end; i++)
 	{
 		if (data[i] % 2) 
 		{
 			++counter;
 		}
 	}
 	results[p] = counter;
 }

 //***********************************************************************************************

 int main()
 {
 	// Setup data:
 	std::vector<int> data(15000000);

 	int counter = 0;
 	std::for_each(std::begin(data), std::end(data), [&counter](int & value){ value = counter++; });
 	// Print data
 	//std::cout << "data = [" << std::endl;
 	//std::for_each(std::begin(data), std::end(data), [](double value){ std::cout << value << " "; });
 	//std::cout << "]" << std::endl;

 	// Example 1: Simple parallel version (flawed)
 	int c;
 	std::cout << std::endl << "Example 1: Simple parallel version (flawed). Press any key" << std::endl;
 	c = getchar();
 	
 	for (unsigned int nbThreads=1; nbThreads<=MAX_NB_THREADS; nbThreads++)
 	{
 		std::cout << "Run example with " << nbThreads << " thread(s)" << std::endl;
 		
 		// setup the resulting memory container
 		std::vector<unsigned int> results(nbThreads, 0);
 	
 		// Start timer:
 		auto t0 = std::chrono::high_resolution_clock::now();
 		
 		std::vector<std::thread> pool;
 		for (unsigned int p=0; p<nbThreads; p++)
 		{
 			pool.push_back(std::thread(false_parallel_sharing_compute, nbThreads, std::cref(data), std::ref(results), p));
 		}
 		
 		// pool join:
 		for (unsigned int p=0; p<nbThreads; p++)
 		{
 			pool[p].join();
 		}
 		// Stop timer:
 		auto elapsed = std::chrono::high_resolution_clock::now() - t0;
 		std::cout << "Elapsed seconds : " << std::chrono::duration<float>(elapsed).count() << std::endl;
 		
 		int odds = 0;
 		std::for_each(std::begin(results), std::end(results), [&odds](unsigned int value) {
 			odds += value;
 		});
 		std::cout << "Number of odd values is " << odds << std::endl;
 	}
 		
 	// Example 2: From Zero to Hero
 	std::cout << std::endl << "Example 2: From Zero to Hero. Press any key" << std::endl;
 	c = getchar();
 	
 	for (unsigned int nbThreads=1; nbThreads<=MAX_NB_THREADS; nbThreads++)
 	{
 		std::cout << "Run example with " << nbThreads << " thread(s)" << std::endl;
 		
 		// setup the resulting memory container
 		std::vector<unsigned int> results(nbThreads, 0);
 		
 		// Start timer:
 		auto t0 = std::chrono::high_resolution_clock::now();
 		
 		std::vector<std::thread> pool;
 		for (unsigned int p=0; p<nbThreads; p++)
 		{
 			pool.push_back(std::thread(better_parallel_sharing_compute, nbThreads, std::cref(data), std::ref(results), p));
 		}
 		
 		// pool join:
 		for (unsigned int p=0; p<nbThreads; p++)
 		{
 			pool[p].join();
 		}
 		// Stop timer:
 		auto elapsed = std::chrono::high_resolution_clock::now() - t0;
 		std::cout << "Elapsed seconds : " << std::chrono::duration<float>(elapsed).count() << std::endl;
 		
 		int odds = 0;
 		std::for_each(std::begin(results), std::end(results), [&odds](unsigned int value) {
 			odds += value;
 		});
 		std::cout << "Number of odd values is " << odds << std::endl;
 	}
 	
 	return 0;
 }
	project(FalseParallelSharing)
	cmake_minimum_required(VERSION 2.8)
	aux_source_directory(. SRC_LIST)
	add_executable(${PROJECT_NAME} ${SRC_LIST})

	// STD
	#include <iostream>
	#include <algorithm>
	#include <vector>
	#include <thread>
	#include <functional>
	#include <chrono>

	const unsigned int MAX_NB_THREADS = 8;

	//***********************************************************************************************

	void false_parallel_sharing_compute(int nbThreads, const std::vector<int> & data,
	std::vector<unsigned int> & results, int p)
	{
	results[p] = 0;
	unsigned int chunkSize = data.size() / nbThreads + 1;
	unsigned int start = p * chunkSize;
	unsigned int end = start + chunkSize > data.size() ? data.size() : start + chunkSize;
	for (unsigned int i=start; i<end; i++)
	{
	if (data[i] % 2)
	{
	// Multiple access to closely placed memory place causes performance slow down
	++results[p];
	}
	}
	}

	//***********************************************************************************************

	void better_parallel_sharing_compute(int nbThreads, const std::vector<int> & data,
	std::vector<unsigned int> & results, int p)
	{
	// Here local variable avoids multiple access to results external variable
	unsigned int counter = 0;
	unsigned int chunkSize = data.size() / nbThreads + 1;
	unsigned int start = p * chunkSize;
	unsigned int end = start + chunkSize > data.size() ? data.size() : start + chunkSize;
	for (unsigned int i=start; i<end; i++)
	{
	if (data[i] % 2)
	{
	++counter;
	}
	}
	results[p] = counter;
	}

	//***********************************************************************************************

	int main()
	{
	// Setup data:
	std::vector<int> data(15000000);

	int counter = 0;
	std::for_each(std::begin(data), std::end(data), [&counter](int & value){ value = counter++; });
	// Print data
	//std::cout << "data = [" << std::endl;
	//std::for_each(std::begin(data), std::end(data), [](double value){ std::cout << value << " "; });
	//std::cout << "]" << std::endl;

	// Example 1: Simple parallel version (flawed)
	int c;
	std::cout << std::endl << "Example 1: Simple parallel version (flawed). Press any key" << std::endl;
	c = getchar();

	for (unsigned int nbThreads=1; nbThreads<=MAX_NB_THREADS; nbThreads++)
	{
	std::cout << "Run example with " << nbThreads << " thread(s)" << std::endl;

	// setup the resulting memory container
	std::vector<unsigned int> results(nbThreads, 0);

	// Start timer:
	auto t0 = std::chrono::high_resolution_clock::now();

	std::vector<std::thread> pool;
	for (unsigned int p=0; p<nbThreads; p++)
	{
	pool.push_back(std::thread(false_parallel_sharing_compute, nbThreads, std::cref(data), std::ref(results), p));
	}

	// pool join:
	for (unsigned int p=0; p<nbThreads; p++)
	{
	pool[p].join();
	}
	// Stop timer:
	auto elapsed = std::chrono::high_resolution_clock::now() - t0;
	std::cout << "Elapsed seconds : " << std::chrono::duration<float>(elapsed).count() << std::endl;

	int odds = 0;
	std::for_each(std::begin(results), std::end(results), [&odds](unsigned int value) {
	odds += value;
	});
	std::cout << "Number of odd values is " << odds << std::endl;
	}

	// Example 2: From Zero to Hero
	std::cout << std::endl << "Example 2: From Zero to Hero. Press any key" << std::endl;
	c = getchar();

	for (unsigned int nbThreads=1; nbThreads<=MAX_NB_THREADS; nbThreads++)
	{
	std::cout << "Run example with " << nbThreads << " thread(s)" << std::endl;

	// setup the resulting memory container
	std::vector<unsigned int> results(nbThreads, 0);

	// Start timer:
	auto t0 = std::chrono::high_resolution_clock::now();

	std::vector<std::thread> pool;
	for (unsigned int p=0; p<nbThreads; p++)
	{
	pool.push_back(std::thread(better_parallel_sharing_compute, nbThreads, std::cref(data), std::ref(results), p));
	}

	// pool join:
	for (unsigned int p=0; p<nbThreads; p++)
	{
	pool[p].join();
	}
	// Stop timer:
	auto elapsed = std::chrono::high_resolution_clock::now() - t0;
	std::cout << "Elapsed seconds : " << std::chrono::duration<float>(elapsed).count() << std::endl;

	int odds = 0;
	std::for_each(std::begin(results), std::end(results), [&odds](unsigned int value) {
	odds += value;
	});
	std::cout << "Number of odd values is " << odds << std::endl;
	}

	return 0;
	}