Created
May 4, 2025 22:37
-
-
Save GeekGawd/d4e51f009a97b0b9311c25396d18b170 to your computer and use it in GitHub Desktop.
C++ Merge Sort Test - SMT On vs OFF
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <pthread.h> | |
#include <unistd.h> | |
#include <sched.h> | |
#include <time.h> | |
#include <string.h> | |
typedef struct { | |
int *a; | |
int l, r; | |
int depth; | |
} Args; | |
void merge(int *a, int l, int m, int r) { | |
int n1 = m - l, n2 = r - m; | |
int *L = malloc(n1 * sizeof *L), *R = malloc(n2 * sizeof *R); | |
for(int i=0;i<n1;i++) L[i]=a[l+i]; | |
for(int j=0;j<n2;j++) R[j]=a[m+j]; | |
int i=0,j=0,k=l; | |
while(i<n1 && j<n2) a[k++] = (L[i]<R[j]?L[i++]:R[j++]); | |
while(i<n1) a[k++] = L[i++]; | |
while(j<n2) a[k++] = R[j++]; | |
free(L); free(R); | |
} | |
void merge_sort_st(int *a, int l, int r) { | |
if(r-l<2) return; | |
int m=(l+r)/2; | |
merge_sort_st(a,l,m); | |
merge_sort_st(a,m,r); | |
merge(a,l,m,r); | |
} | |
void *merge_sort_mt(void *vp) { | |
Args *arg = vp; | |
int l = arg->l, r = arg->r, depth = arg->depth; | |
int *a = arg->a; | |
if(r-l<2) return NULL; | |
int m = (l+r)/2; | |
if(depth==0) { | |
// spawn two threads | |
pthread_t t1,t2; | |
Args a1={a,l,m,depth+1}, a2={a,m,r,depth+1}; | |
pthread_create(&t1,NULL,merge_sort_mt,&a1); | |
pthread_create(&t2,NULL,merge_sort_mt,&a2); | |
pthread_join(t1,NULL); | |
pthread_join(t2,NULL); | |
} else { | |
// deeper: do serial | |
merge_sort_st(a,l,m); | |
merge_sort_st(a,m,r); | |
} | |
merge(a,l,m,r); | |
return NULL; | |
} | |
double run_test(int *orig, int n, int threads, cpu_set_t *mask) { | |
// apply affinity | |
if (sched_setaffinity(0, sizeof *mask, mask) < 0) { | |
perror("sched_setaffinity"); | |
exit(1); | |
} | |
int *a = malloc(n * sizeof *a); | |
memcpy(a, orig, n*sizeof *a); | |
struct timespec t0, t1; | |
clock_gettime(CLOCK_MONOTONIC, &t0); | |
if(threads==1) { | |
merge_sort_st(a,0,n); | |
} else { | |
Args arg={a,0,n,0}; | |
merge_sort_mt(&arg); | |
} | |
clock_gettime(CLOCK_MONOTONIC, &t1); | |
free(a); | |
return (t1.tv_sec - t0.tv_sec) + (t1.tv_nsec - t0.tv_nsec)/1e9; | |
} | |
int main(int argc, char **argv) { | |
int n = (argc>1?atoi(argv[1]):200000); | |
int *orig = malloc(n * sizeof *orig); | |
for(int i=0;i<n;i++) orig[i] = rand(); | |
// prepare CPU masks | |
cpu_set_t m0, m01; | |
CPU_ZERO(&m0); CPU_SET(0,&m0); | |
CPU_ZERO(&m01); CPU_SET(0,&m01); CPU_SET(1,&m01); | |
printf("Array size = %d\n", n); | |
double t1 = run_test(orig,n,1,&m0); | |
printf("1-thread on CPU0 only: %8.4f s\n", t1); | |
double t2 = run_test(orig,n,3,&m0); | |
printf("3-threads on CPU0 only: %8.4f s\n", t2); | |
double t3 = run_test(orig,n,3,&m01); | |
printf("3-threads on CPU0+CPU1 SMT: %8.4f s\n", t3); | |
free(orig); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment