Skip to content

Instantly share code, notes, and snippets.

@DBalashov
Created December 8, 2024 18:02
Show Gist options
  • Save DBalashov/a34a07570d4d5ea7e52958ffe2f7462f to your computer and use it in GitHub Desktop.
Save DBalashov/a34a07570d4d5ea7e52958ffe2f7462f to your computer and use it in GitHub Desktop.
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
// | Method | Categories | N | Mean | Error | StdDev | Median | Ratio | RatioSD |
// |------------------ |----------- |------ |--------------:|-----------:|-----------:|--------------:|------:|--------:|
// | WithoutSSE_Double | double | 64 | 39.257 ns | 1.1039 ns | 3.2548 ns | 38.762 ns | 1.01 | 0.12 |
// | Vector128_Double | double | 64 | 13.238 ns | 0.2417 ns | 0.2586 ns | 13.253 ns | 0.34 | 0.03 |
// | Vector256_Double | double | 64 | 6.716 ns | 0.1770 ns | 0.5219 ns | 6.480 ns | 0.17 | 0.02 |
// | | | | | | | | | |
// | WithoutSSE_Double | double | 512 | 252.062 ns | 7.3832 ns | 21.6538 ns | 247.740 ns | 1.01 | 0.12 |
// | Vector128_Double | double | 512 | 116.078 ns | 0.8956 ns | 0.8378 ns | 116.001 ns | 0.46 | 0.04 |
// | Vector256_Double | double | 512 | 62.180 ns | 0.4071 ns | 0.3808 ns | 62.194 ns | 0.25 | 0.02 |
// | | | | | | | | | |
// | WithoutSSE_Double | double | 4096 | 2,661.153 ns | 26.2153 ns | 24.5218 ns | 2,651.699 ns | 1.00 | 0.01 |
// | Vector128_Double | double | 4096 | 889.251 ns | 3.4527 ns | 3.0608 ns | 889.678 ns | 0.33 | 0.00 |
// | Vector256_Double | double | 4096 | 446.463 ns | 1.4764 ns | 1.3811 ns | 446.630 ns | 0.17 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Double | double | 16384 | 10,576.170 ns | 70.1847 ns | 62.2169 ns | 10,568.991 ns | 1.00 | 0.01 |
// | Vector128_Double | double | 16384 | 3,511.621 ns | 9.5236 ns | 8.9084 ns | 3,512.726 ns | 0.33 | 0.00 |
// | Vector256_Double | double | 16384 | 1,762.894 ns | 5.6482 ns | 5.2833 ns | 1,764.942 ns | 0.17 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Float | float | 64 | 40.477 ns | 1.0291 ns | 3.0344 ns | 40.849 ns | 1.01 | 0.11 |
// | Vector128_Float | float | 64 | 6.491 ns | 0.1553 ns | 0.4456 ns | 6.442 ns | 0.16 | 0.02 |
// | Vector256_Float | float | 64 | 3.277 ns | 0.0893 ns | 0.0877 ns | 3.255 ns | 0.08 | 0.01 |
// | | | | | | | | | |
// | WithoutSSE_Float | float | 512 | 228.998 ns | 3.2642 ns | 3.6281 ns | 227.737 ns | 1.00 | 0.02 |
// | Vector128_Float | float | 512 | 61.829 ns | 1.2437 ns | 1.3824 ns | 61.094 ns | 0.27 | 0.01 |
// | Vector256_Float | float | 512 | 36.694 ns | 0.2411 ns | 0.2255 ns | 36.623 ns | 0.16 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Float | float | 4096 | 2,636.543 ns | 9.9251 ns | 9.2839 ns | 2,632.517 ns | 1.00 | 0.00 |
// | Vector128_Float | float | 4096 | 444.750 ns | 1.2563 ns | 1.1137 ns | 444.584 ns | 0.17 | 0.00 |
// | Vector256_Float | float | 4096 | 226.869 ns | 0.7396 ns | 0.6556 ns | 226.774 ns | 0.09 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Float | float | 16384 | 10,552.978 ns | 26.7767 ns | 22.3598 ns | 10,554.590 ns | 1.00 | 0.00 |
// | Vector128_Float | float | 16384 | 1,760.180 ns | 5.4204 ns | 4.8051 ns | 1,760.475 ns | 0.17 | 0.00 |
// | Vector256_Float | float | 16384 | 895.210 ns | 4.3631 ns | 4.0812 ns | 893.890 ns | 0.08 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Int | int | 64 | 34.376 ns | 0.6876 ns | 0.7061 ns | 34.257 ns | 1.00 | 0.03 |
// | Vector128_Int | int | 64 | 5.542 ns | 0.1250 ns | 0.2155 ns | 5.535 ns | 0.16 | 0.01 |
// | Vector256_Int | int | 64 | 2.718 ns | 0.0786 ns | 0.1588 ns | 2.643 ns | 0.08 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Int | int | 512 | 226.219 ns | 1.3302 ns | 1.2443 ns | 226.018 ns | 1.00 | 0.01 |
// | Vector128_Int | int | 512 | 44.690 ns | 0.4398 ns | 0.3898 ns | 44.605 ns | 0.20 | 0.00 |
// | Vector256_Int | int | 512 | 20.286 ns | 0.3490 ns | 0.2914 ns | 20.181 ns | 0.09 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Int | int | 4096 | 1,762.134 ns | 5.9582 ns | 4.9753 ns | 1,761.751 ns | 1.00 | 0.00 |
// | Vector128_Int | int | 4096 | 306.831 ns | 6.0554 ns | 7.6581 ns | 304.723 ns | 0.17 | 0.00 |
// | Vector256_Int | int | 4096 | 164.695 ns | 2.0061 ns | 1.7783 ns | 164.463 ns | 0.09 | 0.00 |
// | | | | | | | | | |
// | WithoutSSE_Int | int | 16384 | 7,018.468 ns | 13.2138 ns | 12.3602 ns | 7,017.032 ns | 1.00 | 0.00 |
// | Vector128_Int | int | 16384 | 1,278.140 ns | 4.8041 ns | 4.4937 ns | 1,279.285 ns | 0.18 | 0.00 |
// | Vector256_Int | int | 16384 | 692.578 ns | 4.1858 ns | 3.9154 ns | 692.450 ns | 0.10 | 0.00 |
#pragma warning disable CS8618
var summary = BenchmarkRunner.Run<MainTest>();
[SimpleJob(RuntimeMoniker.Net90, baseline: true), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn]
// [WarmupCount(2)]
// [IterationCount(2)]
// [MemoryDiagnoser]
public class MainTest
{
[Params(64, 512, 4096, 16384)]
public int N { get; set; }
int[] nonSortedInts;
double[] nonSortedDoubles;
float[] nonSortedFloats;
[GlobalSetup]
public void GlobalSetup()
{
nonSortedInts = Enumerable.Range(0, N).ToArray();
nonSortedInts[^1] = 0;
nonSortedDoubles = Enumerable.Range(0, N).Select(p => (double) p).ToArray();
nonSortedDoubles[^1] = 0;
nonSortedFloats = Enumerable.Range(0, N).Select(p => (float) p).ToArray();
nonSortedFloats[^1] = 0;
}
[Benchmark(Baseline = true), BenchmarkCategory("int")]
public bool WithoutSSE_Int()
{
var n = nonSortedInts.Length;
for (var i = 1; i < n; i++)
if (nonSortedInts[i - 1] >= nonSortedInts[i])
return false;
return true;
}
[Benchmark, BenchmarkCategory("int")]
public bool Vector128_Int()
{
var zeroMask = Vector128<int>.Zero;
var v = MemoryMarshal.Cast<int, Vector128<int>>(nonSortedInts);
var prev = v[0];
for (var i = 1; i < v.Length; i++)
{
var mask = Sse2.CompareGreaterThan(prev, v[i]);
if (mask != zeroMask)
return false;
}
return true;
}
[Benchmark, BenchmarkCategory("int")]
public bool Vector256_Int()
{
var zeroMask = Vector256<int>.Zero;
var v = MemoryMarshal.Cast<int, Vector256<int>>(nonSortedInts);
var prev = v[0];
for (var i = 1; i < v.Length; i++)
{
var mask = Avx2.CompareGreaterThan(prev, v[i]);
if (mask != zeroMask)
return false;
}
return true;
}
[Benchmark(Baseline = true), BenchmarkCategory("double")]
public bool WithoutSSE_Double()
{
var n = nonSortedDoubles.Length;
for (var i = 1; i < n; i++)
if (nonSortedDoubles[i - 1] >= nonSortedDoubles[i])
return false;
return true;
}
[Benchmark, BenchmarkCategory("double")]
public bool Vector128_Double()
{
var zeroMask = Vector128<double>.Zero;
var v = MemoryMarshal.Cast<double, Vector128<double>>(nonSortedDoubles);
var prev = v[0];
for (var i = 1; i < v.Length; i++)
{
var mask = Sse2.CompareGreaterThan(prev, v[i]);
if (mask != zeroMask)
return false;
}
return true;
}
[Benchmark, BenchmarkCategory("double")]
public bool Vector256_Double()
{
var zeroMask = Vector256<double>.Zero;
var v = MemoryMarshal.Cast<double, Vector256<double>>(nonSortedDoubles);
var prev = v[0];
for (var i = 1; i < v.Length; i++)
{
var mask = Avx.CompareGreaterThan(prev, v[i]);
if (mask != zeroMask)
return false;
}
return true;
}
[Benchmark(Baseline = true), BenchmarkCategory("float")]
public bool WithoutSSE_Float()
{
var n = nonSortedDoubles.Length;
for (var i = 1; i < n; i++)
if (nonSortedDoubles[i - 1] >= nonSortedDoubles[i])
return false;
return true;
}
[Benchmark, BenchmarkCategory("float")]
public bool Vector128_Float()
{
var zeroMask = Vector128<float>.Zero;
var v = MemoryMarshal.Cast<float, Vector128<float>>(nonSortedFloats);
var prev = v[0];
for (var i = 1; i < v.Length; i++)
{
var mask = Sse.CompareGreaterThan(prev, v[i]);
if (mask != zeroMask)
return false;
}
return true;
}
[Benchmark, BenchmarkCategory("float")]
public bool Vector256_Float()
{
var zeroMask = Vector256<float>.Zero;
var v = MemoryMarshal.Cast<float, Vector256<float>>(nonSortedFloats);
var prev = v[0];
for (var i = 1; i < v.Length; i++)
{
var mask = Avx.CompareGreaterThan(prev, v[i]);
if (mask != zeroMask)
return false;
}
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment