Skip to content

Instantly share code, notes, and snippets.

@CyberShadow
Created August 29, 2022 16:36
Show Gist options
  • Select an option

  • Save CyberShadow/63e139a16b9b278fb5d449ace611e7b8 to your computer and use it in GitHub Desktop.

Select an option

Save CyberShadow/63e139a16b9b278fb5d449ace611e7b8 to your computer and use it in GitHub Desktop.
Parallel sort experiments
import std.algorithm.iteration;
import std.algorithm.mutation;
import std.algorithm.sorting;
import std.datetime.stopwatch;
import std.exception;
import std.parallelism;
import std.random;
import std.range;
import std.stdio;
import std.typecons;
enum N = 1*1024*1024;
alias T = float;
alias I = size_t;
immutable T[] data;
shared static this()
{
auto data = new T[N];
rndGen.seed = 0;
foreach (ref datum; data)
datum = uniform01!T;
.data = data.assumeUnique;
}
alias sort0 = sort!((a, b) => data[a] < data[b], SwapStrategy.stable, I[]);
void sort1(I[] order)
{
foreach (chunk; order.chunks(order.length / totalCPUs + 1).parallel)
chunk.sort0();
order.sort0();
}
enum totalCPUsCT = 48;
shared static this() { assert(totalCPUsCT == totalCPUs); }
void sort2(size_t depth = 0)(I[] order)
{
static if ((1L << depth) < totalCPUsCT)
foreach (chunk; order.chunks(order.length / 2 + 1).parallel(1))
chunk.sort2!(depth + 1)();
order.sort0();
}
void sort3(size_t depth = 0)(I[] order)
{
static if ((1L << depth) < totalCPUsCT)
foreach (chunk; order.chunks(order.length / 4 + 1).parallel(1))
chunk.sort3!(depth + 2)();
order.sort0();
}
void sort4(size_t depth = 1)(I[] order)
{
static if ((1L << depth) < totalCPUsCT)
foreach (chunk; order.chunks(order.length / 2 + 1).parallel(1))
chunk.sort4!(depth + 1)();
order.sort0();
}
void sort5(size_t depth = 0)(I[] order)
{
static if ((1L << depth) < totalCPUsCT)
{
auto taskPool = new TaskPool; // this hangs??? why?????
foreach (chunk; taskPool.parallel(order.chunks(order.length / 2 + 1), 1))
chunk.sort5!(depth + 1)();
taskPool.stop();
}
order.sort0();
}
void sort6(size_t depth = 0)(I[] order)
{
enum numChunks = (1L << depth);
static if (numChunks < totalCPUsCT)
order.sort6!(depth + 1)();
foreach (chunk; order.chunks(order.length / numChunks + 1).parallel(1))
chunk.sort0();
}
void sortWith(alias sort)()
{
__gshared I[N] arr;
foreach (I i, ref v; arr)
v = i;
sort(arr[]);
}
void main()
{
benchmark!(
sortWith!sort0,
sortWith!sort1,
sortWith!sort2,
sortWith!sort3,
sortWith!sort4,
// sortWith!sort5,
sortWith!sort6,
)(16).each!writeln;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment