Skip to content

Instantly share code, notes, and snippets.

@DBalashov
Created July 20, 2024 17:13
Show Gist options
  • Save DBalashov/4a94b0996edf1b7809b230bad0633261 to your computer and use it in GitHub Desktop.
Save DBalashov/4a94b0996edf1b7809b230bad0633261 to your computer and use it in GitHub Desktop.
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
#pragma warning disable CS8618
var summary = BenchmarkRunner.Run<MainTest>();
[SimpleJob(RuntimeMoniker.Net80, baseline: true)]
// [WarmupCount(2)]
// [IterationCount(2)]
[MemoryDiagnoser]
public class MainTest
{
readonly int[] nonSortedInts = init();
static int[] init()
{
var r = Enumerable.Range(0, 1048576).ToArray();
r[^1] = 0;
return r;
}
[Benchmark(Baseline = true)]
public bool WithoutSSE()
{
var n = nonSortedInts.Length;
for (var i = 1; i < n; i++)
if (nonSortedInts[i - 1] >= nonSortedInts[i])
return false;
return true;
}
[Benchmark]
public bool WithoutSSE_Optimized()
{
var n = nonSortedInts.Length;
var prev = nonSortedInts[0];
for (var i = 1; i < n; i++)
{
var curr = nonSortedInts[i];
if (prev >= curr)
return false;
prev = curr;
}
return true;
}
[Benchmark]
public bool Vector128()
{
var zeroMask = Vector128<int>.Zero;
var i = 0;
var n = nonSortedInts.Length;
if (n > 4)
{
for (i = 0; i < n - 4; i += 4)
{
var curr = Unsafe.ReadUnaligned<Vector128<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i]));
var next = Unsafe.ReadUnaligned<Vector128<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i + 1]));
var mask = Sse2.CompareGreaterThan(curr, next);
if (mask != zeroMask)
return false;
}
}
for (; i + 1 < n; i++)
if (nonSortedInts[i] > nonSortedInts[i + 1])
return false;
return true;
}
[Benchmark]
public bool Vector128_2()
{
var zeroMask = Vector128<int>.Zero;
var i = 0;
var n = nonSortedInts.Length;
if (n > 4)
{
var prev = Unsafe.ReadUnaligned<Vector128<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i]));
for (i = 4; i < n; i += 4)
{
var curr = Unsafe.ReadUnaligned<Vector128<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i]));
var mask = Sse2.CompareGreaterThan(prev, curr);
if (mask != zeroMask)
return false;
prev = curr;
}
}
for (; i + 1 < n; i++)
if (nonSortedInts[i] > nonSortedInts[i + 1])
return false;
return true;
}
[Benchmark]
public bool Vector256()
{
var zeroMask = Vector256<int>.Zero;
var i = 0;
var n = nonSortedInts.Length;
if (n > 8)
{
for (i = 0; i < n - 8; i += 8)
{
var curr = Unsafe.ReadUnaligned<Vector256<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i]));
var next = Unsafe.ReadUnaligned<Vector256<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i + 1]));
var mask = Avx2.CompareGreaterThan(curr, next);
if (mask != zeroMask)
return false;
}
}
for (; i + 1 < n; i++)
if (nonSortedInts[i] > nonSortedInts[i + 1])
return false;
return true;
}
[Benchmark]
public bool Vector256_2()
{
var zeroMask = Vector256<int>.Zero;
var i = 0;
var n = nonSortedInts.Length;
if (n > 8)
{
var prev = Unsafe.ReadUnaligned<Vector256<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i]));
for (i = 8; i < n; i += 8)
{
var curr = Unsafe.ReadUnaligned<Vector256<int>>(ref Unsafe.As<int, byte>(ref nonSortedInts[i]));
var mask = Avx2.CompareGreaterThan(prev, curr);
if (mask != zeroMask)
return false;
prev = curr;
}
}
for (; i + 1 < n; i++)
if (nonSortedInts[i] > nonSortedInts[i + 1])
return false;
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment