Created
December 8, 2024 18:02
-
-
Save DBalashov/a34a07570d4d5ea7e52958ffe2f7462f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Configs; | |
using BenchmarkDotNet.Jobs; | |
using BenchmarkDotNet.Running; | |
// | Method | Categories | N | Mean | Error | StdDev | Median | Ratio | RatioSD | | |
// |------------------ |----------- |------ |--------------:|-----------:|-----------:|--------------:|------:|--------:| | |
// | WithoutSSE_Double | double | 64 | 39.257 ns | 1.1039 ns | 3.2548 ns | 38.762 ns | 1.01 | 0.12 | | |
// | Vector128_Double | double | 64 | 13.238 ns | 0.2417 ns | 0.2586 ns | 13.253 ns | 0.34 | 0.03 | | |
// | Vector256_Double | double | 64 | 6.716 ns | 0.1770 ns | 0.5219 ns | 6.480 ns | 0.17 | 0.02 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Double | double | 512 | 252.062 ns | 7.3832 ns | 21.6538 ns | 247.740 ns | 1.01 | 0.12 | | |
// | Vector128_Double | double | 512 | 116.078 ns | 0.8956 ns | 0.8378 ns | 116.001 ns | 0.46 | 0.04 | | |
// | Vector256_Double | double | 512 | 62.180 ns | 0.4071 ns | 0.3808 ns | 62.194 ns | 0.25 | 0.02 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Double | double | 4096 | 2,661.153 ns | 26.2153 ns | 24.5218 ns | 2,651.699 ns | 1.00 | 0.01 | | |
// | Vector128_Double | double | 4096 | 889.251 ns | 3.4527 ns | 3.0608 ns | 889.678 ns | 0.33 | 0.00 | | |
// | Vector256_Double | double | 4096 | 446.463 ns | 1.4764 ns | 1.3811 ns | 446.630 ns | 0.17 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Double | double | 16384 | 10,576.170 ns | 70.1847 ns | 62.2169 ns | 10,568.991 ns | 1.00 | 0.01 | | |
// | Vector128_Double | double | 16384 | 3,511.621 ns | 9.5236 ns | 8.9084 ns | 3,512.726 ns | 0.33 | 0.00 | | |
// | Vector256_Double | double | 16384 | 1,762.894 ns | 5.6482 ns | 5.2833 ns | 1,764.942 ns | 0.17 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Float | float | 64 | 40.477 ns | 1.0291 ns | 3.0344 ns | 40.849 ns | 1.01 | 0.11 | | |
// | Vector128_Float | float | 64 | 6.491 ns | 0.1553 ns | 0.4456 ns | 6.442 ns | 0.16 | 0.02 | | |
// | Vector256_Float | float | 64 | 3.277 ns | 0.0893 ns | 0.0877 ns | 3.255 ns | 0.08 | 0.01 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Float | float | 512 | 228.998 ns | 3.2642 ns | 3.6281 ns | 227.737 ns | 1.00 | 0.02 | | |
// | Vector128_Float | float | 512 | 61.829 ns | 1.2437 ns | 1.3824 ns | 61.094 ns | 0.27 | 0.01 | | |
// | Vector256_Float | float | 512 | 36.694 ns | 0.2411 ns | 0.2255 ns | 36.623 ns | 0.16 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Float | float | 4096 | 2,636.543 ns | 9.9251 ns | 9.2839 ns | 2,632.517 ns | 1.00 | 0.00 | | |
// | Vector128_Float | float | 4096 | 444.750 ns | 1.2563 ns | 1.1137 ns | 444.584 ns | 0.17 | 0.00 | | |
// | Vector256_Float | float | 4096 | 226.869 ns | 0.7396 ns | 0.6556 ns | 226.774 ns | 0.09 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Float | float | 16384 | 10,552.978 ns | 26.7767 ns | 22.3598 ns | 10,554.590 ns | 1.00 | 0.00 | | |
// | Vector128_Float | float | 16384 | 1,760.180 ns | 5.4204 ns | 4.8051 ns | 1,760.475 ns | 0.17 | 0.00 | | |
// | Vector256_Float | float | 16384 | 895.210 ns | 4.3631 ns | 4.0812 ns | 893.890 ns | 0.08 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Int | int | 64 | 34.376 ns | 0.6876 ns | 0.7061 ns | 34.257 ns | 1.00 | 0.03 | | |
// | Vector128_Int | int | 64 | 5.542 ns | 0.1250 ns | 0.2155 ns | 5.535 ns | 0.16 | 0.01 | | |
// | Vector256_Int | int | 64 | 2.718 ns | 0.0786 ns | 0.1588 ns | 2.643 ns | 0.08 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Int | int | 512 | 226.219 ns | 1.3302 ns | 1.2443 ns | 226.018 ns | 1.00 | 0.01 | | |
// | Vector128_Int | int | 512 | 44.690 ns | 0.4398 ns | 0.3898 ns | 44.605 ns | 0.20 | 0.00 | | |
// | Vector256_Int | int | 512 | 20.286 ns | 0.3490 ns | 0.2914 ns | 20.181 ns | 0.09 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Int | int | 4096 | 1,762.134 ns | 5.9582 ns | 4.9753 ns | 1,761.751 ns | 1.00 | 0.00 | | |
// | Vector128_Int | int | 4096 | 306.831 ns | 6.0554 ns | 7.6581 ns | 304.723 ns | 0.17 | 0.00 | | |
// | Vector256_Int | int | 4096 | 164.695 ns | 2.0061 ns | 1.7783 ns | 164.463 ns | 0.09 | 0.00 | | |
// | | | | | | | | | | | |
// | WithoutSSE_Int | int | 16384 | 7,018.468 ns | 13.2138 ns | 12.3602 ns | 7,017.032 ns | 1.00 | 0.00 | | |
// | Vector128_Int | int | 16384 | 1,278.140 ns | 4.8041 ns | 4.4937 ns | 1,279.285 ns | 0.18 | 0.00 | | |
// | Vector256_Int | int | 16384 | 692.578 ns | 4.1858 ns | 3.9154 ns | 692.450 ns | 0.10 | 0.00 | | |
#pragma warning disable CS8618 | |
var summary = BenchmarkRunner.Run<MainTest>(); | |
[SimpleJob(RuntimeMoniker.Net90, baseline: true), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn] | |
// [WarmupCount(2)] | |
// [IterationCount(2)] | |
// [MemoryDiagnoser] | |
public class MainTest | |
{ | |
[Params(64, 512, 4096, 16384)] | |
public int N { get; set; } | |
int[] nonSortedInts; | |
double[] nonSortedDoubles; | |
float[] nonSortedFloats; | |
[GlobalSetup] | |
public void GlobalSetup() | |
{ | |
nonSortedInts = Enumerable.Range(0, N).ToArray(); | |
nonSortedInts[^1] = 0; | |
nonSortedDoubles = Enumerable.Range(0, N).Select(p => (double) p).ToArray(); | |
nonSortedDoubles[^1] = 0; | |
nonSortedFloats = Enumerable.Range(0, N).Select(p => (float) p).ToArray(); | |
nonSortedFloats[^1] = 0; | |
} | |
[Benchmark(Baseline = true), BenchmarkCategory("int")] | |
public bool WithoutSSE_Int() | |
{ | |
var n = nonSortedInts.Length; | |
for (var i = 1; i < n; i++) | |
if (nonSortedInts[i - 1] >= nonSortedInts[i]) | |
return false; | |
return true; | |
} | |
[Benchmark, BenchmarkCategory("int")] | |
public bool Vector128_Int() | |
{ | |
var zeroMask = Vector128<int>.Zero; | |
var v = MemoryMarshal.Cast<int, Vector128<int>>(nonSortedInts); | |
var prev = v[0]; | |
for (var i = 1; i < v.Length; i++) | |
{ | |
var mask = Sse2.CompareGreaterThan(prev, v[i]); | |
if (mask != zeroMask) | |
return false; | |
} | |
return true; | |
} | |
[Benchmark, BenchmarkCategory("int")] | |
public bool Vector256_Int() | |
{ | |
var zeroMask = Vector256<int>.Zero; | |
var v = MemoryMarshal.Cast<int, Vector256<int>>(nonSortedInts); | |
var prev = v[0]; | |
for (var i = 1; i < v.Length; i++) | |
{ | |
var mask = Avx2.CompareGreaterThan(prev, v[i]); | |
if (mask != zeroMask) | |
return false; | |
} | |
return true; | |
} | |
[Benchmark(Baseline = true), BenchmarkCategory("double")] | |
public bool WithoutSSE_Double() | |
{ | |
var n = nonSortedDoubles.Length; | |
for (var i = 1; i < n; i++) | |
if (nonSortedDoubles[i - 1] >= nonSortedDoubles[i]) | |
return false; | |
return true; | |
} | |
[Benchmark, BenchmarkCategory("double")] | |
public bool Vector128_Double() | |
{ | |
var zeroMask = Vector128<double>.Zero; | |
var v = MemoryMarshal.Cast<double, Vector128<double>>(nonSortedDoubles); | |
var prev = v[0]; | |
for (var i = 1; i < v.Length; i++) | |
{ | |
var mask = Sse2.CompareGreaterThan(prev, v[i]); | |
if (mask != zeroMask) | |
return false; | |
} | |
return true; | |
} | |
[Benchmark, BenchmarkCategory("double")] | |
public bool Vector256_Double() | |
{ | |
var zeroMask = Vector256<double>.Zero; | |
var v = MemoryMarshal.Cast<double, Vector256<double>>(nonSortedDoubles); | |
var prev = v[0]; | |
for (var i = 1; i < v.Length; i++) | |
{ | |
var mask = Avx.CompareGreaterThan(prev, v[i]); | |
if (mask != zeroMask) | |
return false; | |
} | |
return true; | |
} | |
[Benchmark(Baseline = true), BenchmarkCategory("float")] | |
public bool WithoutSSE_Float() | |
{ | |
var n = nonSortedDoubles.Length; | |
for (var i = 1; i < n; i++) | |
if (nonSortedDoubles[i - 1] >= nonSortedDoubles[i]) | |
return false; | |
return true; | |
} | |
[Benchmark, BenchmarkCategory("float")] | |
public bool Vector128_Float() | |
{ | |
var zeroMask = Vector128<float>.Zero; | |
var v = MemoryMarshal.Cast<float, Vector128<float>>(nonSortedFloats); | |
var prev = v[0]; | |
for (var i = 1; i < v.Length; i++) | |
{ | |
var mask = Sse.CompareGreaterThan(prev, v[i]); | |
if (mask != zeroMask) | |
return false; | |
} | |
return true; | |
} | |
[Benchmark, BenchmarkCategory("float")] | |
public bool Vector256_Float() | |
{ | |
var zeroMask = Vector256<float>.Zero; | |
var v = MemoryMarshal.Cast<float, Vector256<float>>(nonSortedFloats); | |
var prev = v[0]; | |
for (var i = 1; i < v.Length; i++) | |
{ | |
var mask = Avx.CompareGreaterThan(prev, v[i]); | |
if (mask != zeroMask) | |
return false; | |
} | |
return true; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment