Created
December 7, 2024 08:07
-
-
Save DBalashov/caaf2c2da4b2de29f8b827625a69a640 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Configs; | |
using BenchmarkDotNet.Jobs; | |
using BenchmarkDotNet.Running; | |
#pragma warning disable CS8618 | |
var summary = BenchmarkRunner.Run<MainTest>(); | |
// | Method | Categories | N | Mean | Error | StdDev | Ratio | | |
// |---------------------------- |----------- |------- |--------------:|-------------:|-------------:|------:| | |
// | pearson_simple | double | 256 | 320.86 ns | 0.849 ns | 0.752 ns | 1.00 | | |
// | pearson_SIMD | double | 256 | 57.10 ns | 0.206 ns | 0.192 ns | 0.18 | | |
// | | | | | | | | | |
// | pearson_simple | double | 1024 | 1,305.25 ns | 4.063 ns | 3.393 ns | 1.00 | | |
// | pearson_SIMD | double | 1024 | 199.72 ns | 0.637 ns | 0.564 ns | 0.15 | | |
// | | | | | | | | | |
// | pearson_simple | double | 262144 | 341,012.70 ns | 871.292 ns | 727.569 ns | 1.00 | | |
// | pearson_SIMD | double | 262144 | 48,293.10 ns | 518.038 ns | 484.573 ns | 0.14 | | |
// | | | | | | | | | |
// | pearson_simple_float | float | 256 | 320.73 ns | 0.802 ns | 0.626 ns | 1.00 | | |
// | pearson_SIMD_float | float | 256 | 33.36 ns | 0.549 ns | 0.514 ns | 0.10 | | |
// | | | | | | | | | |
// | pearson_simple_float | float | 1024 | 1,302.41 ns | 3.174 ns | 2.478 ns | 1.00 | | |
// | pearson_SIMD_float | float | 1024 | 104.83 ns | 0.183 ns | 0.171 ns | 0.08 | | |
// | | | | | | | | | |
// | pearson_simple_float | float | 262144 | 335,612.23 ns | 586.867 ns | 548.956 ns | 1.00 | | |
// | pearson_SIMD_float | float | 262144 | 25,443.75 ns | 61.841 ns | 51.640 ns | 0.08 | | |
// | | | | | | | | | |
// | pearson_simple_short | short | 256 | 527.25 ns | 1.053 ns | 0.934 ns | 1.00 | | |
// | pearson_SIMD_shortViaDouble | short | 256 | 81.10 ns | 0.176 ns | 0.147 ns | 0.15 | | |
// | pearson_SIMD_shortViaFloat | short | 256 | 49.76 ns | 0.496 ns | 0.464 ns | 0.09 | | |
// | | | | | | | | | |
// | pearson_simple_short | short | 1024 | 2,102.78 ns | 6.821 ns | 6.380 ns | 1.00 | | |
// | pearson_SIMD_shortViaDouble | short | 1024 | 293.92 ns | 0.794 ns | 0.703 ns | 0.14 | | |
// | pearson_SIMD_shortViaFloat | short | 1024 | 156.53 ns | 0.485 ns | 0.454 ns | 0.07 | | |
// | | | | | | | | | |
// | pearson_simple_short | short | 262144 | 538,693.84 ns | 1,367.170 ns | 1,278.852 ns | 1.00 | | |
// | pearson_SIMD_shortViaDouble | short | 262144 | 72,391.89 ns | 155.975 ns | 138.268 ns | 0.13 | | |
// | pearson_SIMD_shortViaFloat | short | 262144 | 37,498.45 ns | 170.257 ns | 159.258 ns | 0.07 | | |
[SimpleJob(RuntimeMoniker.Net90), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn] | |
// [WarmupCount(2)] | |
// [IterationCount(2)] | |
// [MemoryDiagnoser] | |
public class MainTest | |
{ | |
[Params(256, 1024, 256 * 1024)] | |
public int N { get; set; } | |
[GlobalSetup] | |
public void GlobalSetup() | |
{ | |
arr1 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray(); | |
arr2 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray(); | |
arr1f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray(); | |
arr2f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray(); | |
arr1s = Enumerable.Range(0, N).Select(_ => (short) Random.Shared.Next(short.MaxValue)).ToArray(); | |
arr2s = Enumerable.Range(0, N).Select(_ => (short) Random.Shared.Next(short.MaxValue)).ToArray(); | |
} | |
double[] arr1; | |
double[] arr2; | |
float[] arr1f; | |
float[] arr2f; | |
short[] arr1s; | |
short[] arr2s; | |
#region double | |
[Benchmark(Baseline = true), BenchmarkCategory("double")] | |
public double pearson_simple() => pearson_simple(arr1, arr2); | |
[Benchmark, BenchmarkCategory("double")] | |
public double pearson_SIMD() => pearson_SIMD(arr1, arr2); | |
static double pearson_simple(double[] x, double[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var sumX = 0.0; | |
var sumY = 0.0; | |
for (var i = 0; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
} | |
var sumSquaredXY = 0.0; | |
var sumSquaredX = 0.0; | |
var sumSquaredY = 0.0; | |
for (var i = 0; i < x.Length; i++) | |
{ | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
static double pearson_SIMD(double[] x, double[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vx = MemoryMarshal.Cast<double, Vector256<double>>(x); | |
var vy = MemoryMarshal.Cast<double, Vector256<double>>(y); | |
var vectorSumX = Vector256<double>.Zero; | |
var vectorSumY = Vector256<double>.Zero; | |
var vectorSumSquaredXY = Vector256<double>.Zero; | |
var vectorSumSquaredX = Vector256<double>.Zero; | |
var vectorSumSquaredY = Vector256<double>.Zero; | |
for (var i = 0; i < vx.Length; i++) | |
{ | |
var vectorX = vx[i]; | |
var vectorY = vy[i]; | |
vectorSumX += vectorX; | |
vectorSumY += vectorY; | |
vectorSumSquaredXY += vectorX * vectorY; | |
vectorSumSquaredX += vectorX * vectorX; | |
vectorSumSquaredY += vectorY * vectorY; | |
} | |
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3]; | |
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3]; | |
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3]; | |
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3]; | |
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3]; | |
for (var i = vx.Length * 4; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
#endregion | |
#region float | |
[Benchmark(Baseline = true), BenchmarkCategory("float")] | |
public double pearson_simple_float() => pearson_simple_float(arr1f, arr2f); | |
[Benchmark, BenchmarkCategory("float")] | |
public double pearson_SIMD_float() => pearson_SIMD_float(arr1f, arr2f); | |
static float pearson_simple_float(float[] x, float[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var sumX = 0.0f; | |
var sumY = 0.0f; | |
for (var i = 0; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
} | |
var sumSquaredXY = 0.0f; | |
var sumSquaredX = 0.0f; | |
var sumSquaredY = 0.0f; | |
for (var i = 0; i < x.Length; i++) | |
{ | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = MathF.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
static double pearson_SIMD_float(float[] x, float[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vx = MemoryMarshal.Cast<float, Vector256<float>>(x); | |
var vy = MemoryMarshal.Cast<float, Vector256<float>>(y); | |
var vectorSumX = Vector256<float>.Zero; | |
var vectorSumY = Vector256<float>.Zero; | |
var vectorSumSquaredXY = Vector256<float>.Zero; | |
var vectorSumSquaredX = Vector256<float>.Zero; | |
var vectorSumSquaredY = Vector256<float>.Zero; | |
for (var i = 0; i < vx.Length; i++) | |
{ | |
var vectorX = vx[i]; | |
var vectorY = vy[i]; | |
vectorSumX += vectorX; | |
vectorSumY += vectorY; | |
vectorSumSquaredXY += vectorX * vectorY; | |
vectorSumSquaredX += vectorX * vectorX; | |
vectorSumSquaredY += vectorY * vectorY; | |
} | |
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3]; | |
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3]; | |
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3]; | |
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3]; | |
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3]; | |
for (var i = vx.Length * 8; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
#endregion | |
#region short to double/float | |
[Benchmark(Baseline = true), BenchmarkCategory("short")] | |
public double pearson_simple_short() => pearson_simple_short(arr1s, arr2s); | |
[Benchmark, BenchmarkCategory("short")] | |
public double pearson_SIMD_shortViaDouble() => pearson_SIMD_shortViaDouble(arr1s, arr2s); | |
[Benchmark, BenchmarkCategory("short")] | |
public double pearson_SIMD_shortViaFloat() => pearson_SIMD_shortViaFloat(arr1s, arr2s); | |
static double pearson_simple_short(short[] x, short[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var sumX = 0.0; | |
var sumY = 0.0; | |
for (var i = 0; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
} | |
var sumSquaredXY = 0.0; | |
var sumSquaredX = 0.0; | |
var sumSquaredY = 0.0; | |
for (var i = 0; i < x.Length; i++) | |
{ | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
static double pearson_SIMD_shortViaDouble(short[] x, short[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vx = MemoryMarshal.Cast<short, Vector128<short>>(x); | |
var vy = MemoryMarshal.Cast<short, Vector128<short>>(y); | |
var vectorSumX = Vector256<double>.Zero; | |
var vectorSumY = Vector256<double>.Zero; | |
var vectorSumSquaredXY = Vector256<double>.Zero; | |
var vectorSumSquaredX = Vector256<double>.Zero; | |
var vectorSumSquaredY = Vector256<double>.Zero; | |
for (var i = 0; i < vx.Length; i++) | |
{ | |
var vectorX = vx[i]; | |
var vectorY = vy[i]; | |
var lowPartX = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(vectorX)); | |
var highPartX = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(Sse2.ShiftRightLogical128BitLane(vectorX, 8))); | |
vectorSumX += lowPartX + highPartX; | |
var lowPartY = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(vectorY)); | |
var highPartY = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(Sse2.ShiftRightLogical128BitLane(vectorY, 8))); | |
vectorSumY += lowPartY + highPartY; | |
vectorSumSquaredXY += lowPartX * lowPartY + highPartX * highPartY; | |
vectorSumSquaredX += lowPartX * lowPartX + highPartX * highPartX; | |
vectorSumSquaredY += lowPartY * lowPartY + highPartY * highPartY; | |
} | |
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3]; | |
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3]; | |
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3]; | |
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3]; | |
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3]; | |
for (var i = vx.Length * 8; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
static double pearson_SIMD_shortViaFloat(short[] x, short[] y) | |
{ | |
ArgumentNullException.ThrowIfNull(x); | |
ArgumentNullException.ThrowIfNull(y); | |
if (x.Length != y.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vx = MemoryMarshal.Cast<short, Vector256<short>>(x); | |
var vy = MemoryMarshal.Cast<short, Vector256<short>>(y); | |
var vectorSumX = Vector256<float>.Zero; | |
var vectorSumY = Vector256<float>.Zero; | |
var vectorSumSquaredXY = Vector256<float>.Zero; | |
var vectorSumSquaredX = Vector256<float>.Zero; | |
var vectorSumSquaredY = Vector256<float>.Zero; | |
for (var i = 0; i < vx.Length; i++) | |
{ | |
var vectorX = vx[i]; | |
var vectorY = vy[i]; | |
var lowPartX = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorX.GetLower())); | |
var highPartX = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorX.GetUpper())); | |
vectorSumX += lowPartX + highPartX; | |
var lowPartY = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorY.GetLower())); | |
var highPartY = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorY.GetUpper())); | |
vectorSumY += lowPartY + highPartY; | |
vectorSumSquaredXY += lowPartX * lowPartY + highPartX * highPartY; | |
vectorSumSquaredX += lowPartX * lowPartX + highPartX * highPartX; | |
vectorSumSquaredY += lowPartY * lowPartY + highPartY * highPartY; | |
} | |
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3]; | |
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3]; | |
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3]; | |
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3]; | |
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3]; | |
for (var i = vx.Length * 16; i < x.Length; i++) | |
{ | |
sumX += x[i]; | |
sumY += y[i]; | |
sumSquaredXY += x[i] * y[i]; | |
sumSquaredX += x[i] * x[i]; | |
sumSquaredY += y[i] * y[i]; | |
} | |
var r1 = (x.Length * sumSquaredXY - sumX * sumY); | |
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY)); | |
return r1 / r2; | |
} | |
#endregion | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment