Skip to content

Instantly share code, notes, and snippets.

@GeekGawd
Created May 4, 2025 22:37
Show Gist options
  • Save GeekGawd/d4e51f009a97b0b9311c25396d18b170 to your computer and use it in GitHub Desktop.
Save GeekGawd/d4e51f009a97b0b9311c25396d18b170 to your computer and use it in GitHub Desktop.
C++ Merge Sort Test - SMT On vs OFF
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <time.h>
#include <string.h>
typedef struct {
int *a;
int l, r;
int depth;
} Args;
void merge(int *a, int l, int m, int r) {
int n1 = m - l, n2 = r - m;
int *L = malloc(n1 * sizeof *L), *R = malloc(n2 * sizeof *R);
for(int i=0;i<n1;i++) L[i]=a[l+i];
for(int j=0;j<n2;j++) R[j]=a[m+j];
int i=0,j=0,k=l;
while(i<n1 && j<n2) a[k++] = (L[i]<R[j]?L[i++]:R[j++]);
while(i<n1) a[k++] = L[i++];
while(j<n2) a[k++] = R[j++];
free(L); free(R);
}
void merge_sort_st(int *a, int l, int r) {
if(r-l<2) return;
int m=(l+r)/2;
merge_sort_st(a,l,m);
merge_sort_st(a,m,r);
merge(a,l,m,r);
}
void *merge_sort_mt(void *vp) {
Args *arg = vp;
int l = arg->l, r = arg->r, depth = arg->depth;
int *a = arg->a;
if(r-l<2) return NULL;
int m = (l+r)/2;
if(depth==0) {
// spawn two threads
pthread_t t1,t2;
Args a1={a,l,m,depth+1}, a2={a,m,r,depth+1};
pthread_create(&t1,NULL,merge_sort_mt,&a1);
pthread_create(&t2,NULL,merge_sort_mt,&a2);
pthread_join(t1,NULL);
pthread_join(t2,NULL);
} else {
// deeper: do serial
merge_sort_st(a,l,m);
merge_sort_st(a,m,r);
}
merge(a,l,m,r);
return NULL;
}
double run_test(int *orig, int n, int threads, cpu_set_t *mask) {
// apply affinity
if (sched_setaffinity(0, sizeof *mask, mask) < 0) {
perror("sched_setaffinity");
exit(1);
}
int *a = malloc(n * sizeof *a);
memcpy(a, orig, n*sizeof *a);
struct timespec t0, t1;
clock_gettime(CLOCK_MONOTONIC, &t0);
if(threads==1) {
merge_sort_st(a,0,n);
} else {
Args arg={a,0,n,0};
merge_sort_mt(&arg);
}
clock_gettime(CLOCK_MONOTONIC, &t1);
free(a);
return (t1.tv_sec - t0.tv_sec) + (t1.tv_nsec - t0.tv_nsec)/1e9;
}
int main(int argc, char **argv) {
int n = (argc>1?atoi(argv[1]):200000);
int *orig = malloc(n * sizeof *orig);
for(int i=0;i<n;i++) orig[i] = rand();
// prepare CPU masks
cpu_set_t m0, m01;
CPU_ZERO(&m0); CPU_SET(0,&m0);
CPU_ZERO(&m01); CPU_SET(0,&m01); CPU_SET(1,&m01);
printf("Array size = %d\n", n);
double t1 = run_test(orig,n,1,&m0);
printf("1-thread on CPU0 only: %8.4f s\n", t1);
double t2 = run_test(orig,n,3,&m0);
printf("3-threads on CPU0 only: %8.4f s\n", t2);
double t3 = run_test(orig,n,3,&m01);
printf("3-threads on CPU0+CPU1 SMT: %8.4f s\n", t3);
free(orig);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment