Skip to content

Instantly share code, notes, and snippets.

@coderodde
Last active September 3, 2024 06:01
Show Gist options
  • Save coderodde/39ed37ba23fbc45fbe424e8889dda50c to your computer and use it in GitHub Desktop.
Save coderodde/39ed37ba23fbc45fbe424e8889dda50c to your computer and use it in GitHub Desktop.
A sorting task for quicksort in PPC course.
#include <iostream>
#include <algorithm>
#include <chrono>
#include <omp.h>
#include <pthread.h>
#define N 50000000
using std::swap;
using std::cout;
typedef unsigned long long data_t;
static int median(data_t* data,
int a,
int b,
int c) {
if (data[a] < data[b]) {
if (data[c] < data[a]) {
return a;
}
return data[c] > data[b] ? b : c;
}
if (data[c] < data[b]) {
return b;
}
return data[c] > data[a] ? a : c;
}
typedef struct parallel_sort_arguments {
data_t* data;
int lo;
int hi;
int threads;
} parallel_sort_arguments;
static void swap(data_t* data, int i, int j) {
data_t temp = data[i];
data[i] = data[j];
data[j] = temp;
}
static int partition(data_t* data, int lo, int hi) {
int pivot = data[hi];
int i = lo - 1;
for (int j = lo; j < hi; j++) {
if (data[j] < pivot) {
i++;
swap(data, i, j);
}
}
swap(data, i + 1, hi);
return i + 1;
/*data_t pivot = data[lo];
int i = lo - 1;
int j = hi + 1;
while (true) {
do i++; while (data[i] < pivot);
do j--; while (data[j] > pivot);
if (i >= j) return j;
data_t temp = data[i];
data[i] = data[j];
data[j] = temp;
}*/
}
static void* parallel_sort_impl(void* arg) {
parallel_sort_arguments* p = (parallel_sort_arguments*) arg;
data_t* data = p->data;
int lo = p->lo;
int hi = p->hi;
int threads = p->threads;
int range_length = hi - lo + 1;
if (range_length < 2 << 16 || threads <= 1) {
std::sort(data + lo, data + hi + 1);
return nullptr;
}
int distance = range_length / 4;
int a = lo + distance;
int b = lo + distance * 2;
int c = lo + distance * 3;
int median_index = median(data, a, b, c);
swap(data, median_index, hi);
int pi = partition(data, lo, hi);
int thread_count_left = threads / 2;
int thread_count_right = threads - thread_count_left;
parallel_sort_arguments left_threads_args;
left_threads_args.data = data;
left_threads_args.lo = lo;
left_threads_args.hi = pi;
left_threads_args.threads = thread_count_left;
parallel_sort_arguments right_threads_args;
right_threads_args.data = data;
right_threads_args.lo = pi + 1;
right_threads_args.hi = hi;
right_threads_args.threads = thread_count_right;
pthread_t left_pthread_id;
pthread_t right_pthread_id;
pthread_create(&right_pthread_id,
NULL,
parallel_sort_impl,
&right_threads_args);
pthread_create(&left_pthread_id,
NULL,
parallel_sort_impl,
&left_threads_args);
pthread_join(right_pthread_id, NULL);
pthread_join(left_pthread_id, NULL);
return nullptr;
}
void pthread_sort(data_t* data, int size) {
parallel_sort_arguments args;
args.data = data;
args.lo = 0;
args.hi = size - 1;
args.threads = omp_get_num_procs();
parallel_sort_impl((void*) &args);
}
static std::chrono::milliseconds get_millis() {
return std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch());
}
static void do_concurrent_sort(data_t* data,
int lo,
int pivot,
int hi,
int threads,
int right_threads);
static void sortImpl(data_t* data,
int lo,
int hi,
int threads) {
if (threads <= 1 || hi - lo < 100000) {
std::sort(data + lo, data + hi + 1);
return;
}
int pivot = partition(data, lo, hi);
int left_threads = threads / 2;
int right_threads = threads - left_threads;
/*
#pragma omp parallel
{
#pragma omp single
{
do_concurrent_sort(data,
lo,
pivot,
hi,
left_threads,
right_threads);
}
}*/
/*
#pragma omp parallel
{
#pragma omp single
{
#pragma omp task
{
sortImpl(data,
lo,
pivot,
left_threads);
}
}
}*/
#pragma omp parallel shared(data) num_threads(1)
{
sortImpl(data,
lo,
pivot,
left_threads);
}
sortImpl(data,
pivot + 1,
hi,
right_threads);
}
static void do_concurrent_sort(data_t* data,
int lo,
int pivot,
int hi,
int left_threads,
int right_threads) {
#pragma omp task
sortImpl(data,
lo,
pivot,
left_threads);
#pragma omp task
sortImpl(data,
pivot + 1,
hi,
right_threads);
}
void psort(int n, data_t *data) {
sortImpl(data,
0,
n - 1,
omp_get_max_threads());
}
static void ssort(data_t* data, int lo, int hi) {
int range_length = hi - lo + 1;
if (range_length < 10) {
std::sort(data + lo, data + hi + 1);
return;
}
int distance = range_length / 4;
int a = lo + distance;
int b = lo + distance * 2;
int c = lo + distance * 3;
int median_index = median(data, a, b, c);
swap(data, median_index, hi);
int pi = partition(data, lo, hi);
ssort(data, lo, pi);
ssort(data, pi + 1, hi);
}
void ssort(int n, data_t* data) {
ssort(data, 0, n - 1);
}
int main() {
std::cout << "Starting benchmark.\n";
data_t* arr = new data_t[N];
data_t* arr2 = new data_t[N];
data_t* arr3 = new data_t[N];
data_t* arr4 = new data_t[N];
for (int i = 0; i < N; i++) {
arr[i] = rand();
arr2[i] = arr[i];
arr3[i] = arr[i];
arr4[i] = arr[i];
}
auto st = get_millis();
pthread_sort(arr4, N);
auto et = get_millis();
std::cout << "pthread_sort duration: " << et - st << "\n";
st = get_millis();
psort(N, arr);
et = get_millis();
std::cout << "My parallel quicksort duration: " << et - st << "\n";
st = get_millis();
ssort(N, arr2);
et = get_millis();
std::cout << "My sequential quicksort duration: " << et - st << "\n";
st = get_millis();
std::sort(arr3, arr3 + N);
et = get_millis();
std::cout << "std::sort duration: " << et - st << "\n";
std::cout << "Equals: " << std::boolalpha
<< (std::equal(arr2, arr2 + N, arr) &&
std::equal(arr3, arr3 + N, arr) &&
std::equal(arr4, arr4 + N, arr)) << "\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment