Last active
October 7, 2024 06:12
-
-
Save tedliosu/e4ac21005fa792f5274012316affd768 to your computer and use it in GitHub Desktop.
OpenMP Parallel Mergesort Sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Iterative C program for merge sort, partly inspired | |
* by Geeks for geeks article on bottom up mergesort as | |
* well as using code from university slides on co-rank | |
* merges by Dr. Steven S. Lumetta, parallelized */ | |
// compile with "gcc -fopenmp -O3 iterative_mergesort_openmp.c -o iterative_mergesort_openmp -lm" | |
/* Uploaded this file as a gist instead of within a proper repo since | |
* the code within this file isn't quite up to par with coding standards | |
* of code normally uploaded to my github profile */ | |
#include <math.h> | |
#include <omp.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
// CPU PARAMS | |
#define MAX_PARALLEL_NESTING 2 | |
#define SCALING_CONST 2 | |
long min(long first, long second) { | |
return (first < second ? first : second); | |
} | |
void merge(double* first_input_array, | |
double* second_input_array, | |
double* dest_array, | |
long first_input_arr_len, | |
long second_input_arr_len) { | |
long current_index_first_arr = 0; | |
long current_index_second_arr = 0; | |
long current_index_dest_arr = 0; | |
while (current_index_first_arr < first_input_arr_len && | |
current_index_second_arr < second_input_arr_len) { | |
if (first_input_array[current_index_first_arr] <= | |
second_input_array[current_index_second_arr]) { | |
dest_array[current_index_dest_arr++] = | |
first_input_array[current_index_first_arr++]; | |
} else { | |
dest_array[current_index_dest_arr++] = | |
second_input_array[current_index_second_arr++]; | |
} | |
} | |
if (current_index_first_arr >= first_input_arr_len) { | |
for (; current_index_second_arr < second_input_arr_len; | |
++current_index_second_arr) { | |
dest_array[current_index_dest_arr++] = | |
second_input_array[current_index_second_arr]; | |
} | |
} else { | |
for (; current_index_first_arr < first_input_arr_len; | |
++current_index_first_arr) { | |
dest_array[current_index_dest_arr++] = | |
first_input_array[current_index_first_arr]; | |
} | |
} | |
} | |
long determine_corank(long dest_arr_starting_index, | |
double* first_input_arr, | |
long first_input_arr_len, | |
double* second_input_arr, | |
long second_input_arr_len) { | |
long low, high; | |
if (dest_arr_starting_index > second_input_arr_len) { | |
low = dest_arr_starting_index - second_input_arr_len; | |
} else { | |
low = 0; | |
} | |
if (dest_arr_starting_index < first_input_arr_len) { | |
high = dest_arr_starting_index; | |
} else { | |
high = first_input_arr_len; | |
} | |
while (low < high) { | |
long current_index_first_arr = low + (high - low) / 2; | |
long current_index_second_arr = | |
dest_arr_starting_index - current_index_first_arr; | |
if (current_index_first_arr > 0 && | |
current_index_second_arr < second_input_arr_len && | |
first_input_arr[current_index_first_arr - 1] > | |
second_input_arr[current_index_second_arr]) { | |
high = current_index_first_arr - 1; | |
} else if (current_index_second_arr > 0 && | |
current_index_first_arr < first_input_arr_len && | |
first_input_arr[current_index_first_arr] <= | |
second_input_arr[current_index_second_arr - 1]) { | |
low = current_index_first_arr + 1; | |
} else { | |
return current_index_first_arr; | |
} | |
} | |
return low; | |
} | |
void parallel_merge_using_corank(double* first_input_array, | |
double* second_input_array, | |
double* dest_array, | |
long first_input_array_len, | |
long second_input_array_len, | |
long num_of_threads, | |
long overall_arr_size) { | |
long min_array_portion_len = | |
min(first_input_array_len, second_input_array_len); | |
if (min_array_portion_len <= 0) { | |
min_array_portion_len = 1; | |
} | |
long total_thread_count = 1 + ((num_of_threads * SCALING_CONST - 1) / | |
(overall_arr_size / min_array_portion_len)); | |
#pragma omp parallel for shared(total_thread_count) \ | |
num_threads(total_thread_count) | |
for (long thread_id = 0; thread_id < total_thread_count; ++thread_id) { | |
long dest_arr_chunk_size = | |
1 + ((first_input_array_len + second_input_array_len - 1) / | |
total_thread_count); | |
long dest_arr_starting_index = thread_id * dest_arr_chunk_size; | |
dest_arr_starting_index = | |
min(first_input_array_len + second_input_array_len, | |
dest_arr_starting_index); | |
long dest_arr_ending_index = dest_arr_starting_index + dest_arr_chunk_size; | |
dest_arr_ending_index = min(first_input_array_len + second_input_array_len, | |
dest_arr_ending_index); | |
long first_arr_starting_index = determine_corank( | |
dest_arr_starting_index, first_input_array, first_input_array_len, | |
second_input_array, second_input_array_len); | |
long first_arr_ending_index = determine_corank( | |
dest_arr_ending_index, first_input_array, first_input_array_len, | |
second_input_array, second_input_array_len); | |
long second_arr_starting_index = | |
dest_arr_starting_index - first_arr_starting_index; | |
long second_arr_ending_index = | |
dest_arr_ending_index - first_arr_ending_index; | |
merge(&first_input_array[first_arr_starting_index], | |
&second_input_array[second_arr_starting_index], | |
&dest_array[dest_arr_starting_index], | |
first_arr_ending_index - first_arr_starting_index, | |
second_arr_ending_index - second_arr_starting_index); | |
} | |
} | |
/* Iterative parallel_merge_sort function to sort arr[0...n-1] */ | |
void parallel_merge_sort(double** arr, double** arr_aux, long n, long num_threads) { | |
long curr_size; // For current size of subarrays to be merged | |
// curr_size varies from 1 to n/2 | |
long left_start; // For picking starting index of left subarray | |
// to be merged | |
double* arr_deref = *arr; | |
double* arr_aux_deref = *arr_aux; | |
#pragma omp parallel for shared(arr, arr_aux, n) | |
for (long current_index = 0; current_index < n; ++current_index) { | |
arr_deref[current_index] = arr_aux_deref[current_index]; | |
} | |
long serial_merge_threshold = (n >> ((int)ceil(log(num_threads) / log(2)))); | |
// Merge subarrays in bottom up manner. First merge subarrays of | |
// size 1 to create sorted subarrays of size 2, then merge subarrays | |
// of size 2 to create sorted subarrays of size 4, and so on. | |
for (curr_size = 1; curr_size <= n - 1; curr_size = 2 * curr_size) { | |
#pragma omp parallel for shared(arr, arr_aux, curr_size, n) private(left_start) | |
// Pick starting point of different subarrays of current size | |
for (left_start = 0; left_start < n - 1; left_start += 2 * curr_size) { | |
// Find ending point of left subarray. mid+1 is starting | |
// point of right | |
long mid = min(left_start + curr_size - 1, n - 1); | |
long right_end = min(left_start + 2 * curr_size - 1, n - 1); | |
long arr_left_len = mid - left_start + 1; | |
long arr_right_len = right_end - mid; | |
if (arr_left_len <= serial_merge_threshold && | |
arr_right_len <= serial_merge_threshold) { | |
merge(&arr_aux_deref[left_start], &arr_aux_deref[mid + 1], &arr_deref[left_start], | |
arr_left_len, arr_right_len); | |
} else { | |
parallel_merge_using_corank(&arr_aux_deref[left_start], &arr_aux_deref[mid + 1], | |
&arr_deref[left_start], arr_left_len, | |
arr_right_len, num_threads, n); | |
} | |
} | |
// Swap pointers to avoid redundant copying | |
double* tmp_array_ptr = *arr_aux; | |
*arr_aux = *arr; | |
*arr = tmp_array_ptr; | |
arr_deref = *arr; | |
arr_aux_deref = *arr_aux; | |
} | |
} | |
/* Driver program to test above functions */ | |
int main() { | |
omp_set_max_active_levels(MAX_PARALLEL_NESTING); | |
srand(100); | |
long N; | |
printf("Enter array size: "); | |
int blah = scanf("%ld", &N); | |
// Hack to suppress compiler warning about scanf | |
(void)blah; | |
double* arr = malloc(N * sizeof(*arr)); | |
double* arr_aux = malloc(N * sizeof(*arr)); | |
for (long i = 0; i < N; i++) { | |
arr[i] = rand(); | |
} | |
long num_threads = 0; | |
#pragma omp parallel shared(num_threads) | |
{ num_threads = omp_get_num_threads(); } | |
double start_time = omp_get_wtime(); | |
parallel_merge_sort(&arr, &arr_aux, N, num_threads); | |
double end_time = omp_get_wtime(); | |
bool sort_correctly = true; | |
for (long i = 1; i < N; i++) { | |
if (arr[i] < arr[i - 1]) { | |
printf("Sorting failed using parallel_merge_sort!\n"); | |
sort_correctly = false; | |
break; | |
} | |
} | |
if (sort_correctly) { | |
printf("Sorting succeeded using parallel_merge_sort!\n"); | |
} | |
printf( | |
"Time took to sort array of length %ld " | |
"using %ld threads: %lf seconds.\n", | |
N, num_threads, end_time - start_time); | |
free(arr); | |
free(arr_aux); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment