Skip to content

Instantly share code, notes, and snippets.

@DBalashov
Created December 7, 2024 08:07
Show Gist options
  • Save DBalashov/caaf2c2da4b2de29f8b827625a69a640 to your computer and use it in GitHub Desktop.
Save DBalashov/caaf2c2da4b2de29f8b827625a69a640 to your computer and use it in GitHub Desktop.
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
#pragma warning disable CS8618
var summary = BenchmarkRunner.Run<MainTest>();
// | Method | Categories | N | Mean | Error | StdDev | Ratio |
// |---------------------------- |----------- |------- |--------------:|-------------:|-------------:|------:|
// | pearson_simple | double | 256 | 320.86 ns | 0.849 ns | 0.752 ns | 1.00 |
// | pearson_SIMD | double | 256 | 57.10 ns | 0.206 ns | 0.192 ns | 0.18 |
// | | | | | | | |
// | pearson_simple | double | 1024 | 1,305.25 ns | 4.063 ns | 3.393 ns | 1.00 |
// | pearson_SIMD | double | 1024 | 199.72 ns | 0.637 ns | 0.564 ns | 0.15 |
// | | | | | | | |
// | pearson_simple | double | 262144 | 341,012.70 ns | 871.292 ns | 727.569 ns | 1.00 |
// | pearson_SIMD | double | 262144 | 48,293.10 ns | 518.038 ns | 484.573 ns | 0.14 |
// | | | | | | | |
// | pearson_simple_float | float | 256 | 320.73 ns | 0.802 ns | 0.626 ns | 1.00 |
// | pearson_SIMD_float | float | 256 | 33.36 ns | 0.549 ns | 0.514 ns | 0.10 |
// | | | | | | | |
// | pearson_simple_float | float | 1024 | 1,302.41 ns | 3.174 ns | 2.478 ns | 1.00 |
// | pearson_SIMD_float | float | 1024 | 104.83 ns | 0.183 ns | 0.171 ns | 0.08 |
// | | | | | | | |
// | pearson_simple_float | float | 262144 | 335,612.23 ns | 586.867 ns | 548.956 ns | 1.00 |
// | pearson_SIMD_float | float | 262144 | 25,443.75 ns | 61.841 ns | 51.640 ns | 0.08 |
// | | | | | | | |
// | pearson_simple_short | short | 256 | 527.25 ns | 1.053 ns | 0.934 ns | 1.00 |
// | pearson_SIMD_shortViaDouble | short | 256 | 81.10 ns | 0.176 ns | 0.147 ns | 0.15 |
// | pearson_SIMD_shortViaFloat | short | 256 | 49.76 ns | 0.496 ns | 0.464 ns | 0.09 |
// | | | | | | | |
// | pearson_simple_short | short | 1024 | 2,102.78 ns | 6.821 ns | 6.380 ns | 1.00 |
// | pearson_SIMD_shortViaDouble | short | 1024 | 293.92 ns | 0.794 ns | 0.703 ns | 0.14 |
// | pearson_SIMD_shortViaFloat | short | 1024 | 156.53 ns | 0.485 ns | 0.454 ns | 0.07 |
// | | | | | | | |
// | pearson_simple_short | short | 262144 | 538,693.84 ns | 1,367.170 ns | 1,278.852 ns | 1.00 |
// | pearson_SIMD_shortViaDouble | short | 262144 | 72,391.89 ns | 155.975 ns | 138.268 ns | 0.13 |
// | pearson_SIMD_shortViaFloat | short | 262144 | 37,498.45 ns | 170.257 ns | 159.258 ns | 0.07 |
[SimpleJob(RuntimeMoniker.Net90), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn]
// [WarmupCount(2)]
// [IterationCount(2)]
// [MemoryDiagnoser]
public class MainTest
{
[Params(256, 1024, 256 * 1024)]
public int N { get; set; }
[GlobalSetup]
public void GlobalSetup()
{
arr1 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray();
arr2 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray();
arr1f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray();
arr2f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray();
arr1s = Enumerable.Range(0, N).Select(_ => (short) Random.Shared.Next(short.MaxValue)).ToArray();
arr2s = Enumerable.Range(0, N).Select(_ => (short) Random.Shared.Next(short.MaxValue)).ToArray();
}
double[] arr1;
double[] arr2;
float[] arr1f;
float[] arr2f;
short[] arr1s;
short[] arr2s;
#region double
[Benchmark(Baseline = true), BenchmarkCategory("double")]
public double pearson_simple() => pearson_simple(arr1, arr2);
[Benchmark, BenchmarkCategory("double")]
public double pearson_SIMD() => pearson_SIMD(arr1, arr2);
static double pearson_simple(double[] x, double[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var sumX = 0.0;
var sumY = 0.0;
for (var i = 0; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
}
var sumSquaredXY = 0.0;
var sumSquaredX = 0.0;
var sumSquaredY = 0.0;
for (var i = 0; i < x.Length; i++)
{
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
static double pearson_SIMD(double[] x, double[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var vx = MemoryMarshal.Cast<double, Vector256<double>>(x);
var vy = MemoryMarshal.Cast<double, Vector256<double>>(y);
var vectorSumX = Vector256<double>.Zero;
var vectorSumY = Vector256<double>.Zero;
var vectorSumSquaredXY = Vector256<double>.Zero;
var vectorSumSquaredX = Vector256<double>.Zero;
var vectorSumSquaredY = Vector256<double>.Zero;
for (var i = 0; i < vx.Length; i++)
{
var vectorX = vx[i];
var vectorY = vy[i];
vectorSumX += vectorX;
vectorSumY += vectorY;
vectorSumSquaredXY += vectorX * vectorY;
vectorSumSquaredX += vectorX * vectorX;
vectorSumSquaredY += vectorY * vectorY;
}
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3];
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3];
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3];
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3];
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3];
for (var i = vx.Length * 4; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
#endregion
#region float
[Benchmark(Baseline = true), BenchmarkCategory("float")]
public double pearson_simple_float() => pearson_simple_float(arr1f, arr2f);
[Benchmark, BenchmarkCategory("float")]
public double pearson_SIMD_float() => pearson_SIMD_float(arr1f, arr2f);
static float pearson_simple_float(float[] x, float[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var sumX = 0.0f;
var sumY = 0.0f;
for (var i = 0; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
}
var sumSquaredXY = 0.0f;
var sumSquaredX = 0.0f;
var sumSquaredY = 0.0f;
for (var i = 0; i < x.Length; i++)
{
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = MathF.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
static double pearson_SIMD_float(float[] x, float[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var vx = MemoryMarshal.Cast<float, Vector256<float>>(x);
var vy = MemoryMarshal.Cast<float, Vector256<float>>(y);
var vectorSumX = Vector256<float>.Zero;
var vectorSumY = Vector256<float>.Zero;
var vectorSumSquaredXY = Vector256<float>.Zero;
var vectorSumSquaredX = Vector256<float>.Zero;
var vectorSumSquaredY = Vector256<float>.Zero;
for (var i = 0; i < vx.Length; i++)
{
var vectorX = vx[i];
var vectorY = vy[i];
vectorSumX += vectorX;
vectorSumY += vectorY;
vectorSumSquaredXY += vectorX * vectorY;
vectorSumSquaredX += vectorX * vectorX;
vectorSumSquaredY += vectorY * vectorY;
}
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3];
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3];
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3];
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3];
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3];
for (var i = vx.Length * 8; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
#endregion
#region short to double/float
[Benchmark(Baseline = true), BenchmarkCategory("short")]
public double pearson_simple_short() => pearson_simple_short(arr1s, arr2s);
[Benchmark, BenchmarkCategory("short")]
public double pearson_SIMD_shortViaDouble() => pearson_SIMD_shortViaDouble(arr1s, arr2s);
[Benchmark, BenchmarkCategory("short")]
public double pearson_SIMD_shortViaFloat() => pearson_SIMD_shortViaFloat(arr1s, arr2s);
static double pearson_simple_short(short[] x, short[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var sumX = 0.0;
var sumY = 0.0;
for (var i = 0; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
}
var sumSquaredXY = 0.0;
var sumSquaredX = 0.0;
var sumSquaredY = 0.0;
for (var i = 0; i < x.Length; i++)
{
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
static double pearson_SIMD_shortViaDouble(short[] x, short[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var vx = MemoryMarshal.Cast<short, Vector128<short>>(x);
var vy = MemoryMarshal.Cast<short, Vector128<short>>(y);
var vectorSumX = Vector256<double>.Zero;
var vectorSumY = Vector256<double>.Zero;
var vectorSumSquaredXY = Vector256<double>.Zero;
var vectorSumSquaredX = Vector256<double>.Zero;
var vectorSumSquaredY = Vector256<double>.Zero;
for (var i = 0; i < vx.Length; i++)
{
var vectorX = vx[i];
var vectorY = vy[i];
var lowPartX = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(vectorX));
var highPartX = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(Sse2.ShiftRightLogical128BitLane(vectorX, 8)));
vectorSumX += lowPartX + highPartX;
var lowPartY = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(vectorY));
var highPartY = Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(Sse2.ShiftRightLogical128BitLane(vectorY, 8)));
vectorSumY += lowPartY + highPartY;
vectorSumSquaredXY += lowPartX * lowPartY + highPartX * highPartY;
vectorSumSquaredX += lowPartX * lowPartX + highPartX * highPartX;
vectorSumSquaredY += lowPartY * lowPartY + highPartY * highPartY;
}
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3];
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3];
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3];
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3];
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3];
for (var i = vx.Length * 8; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
static double pearson_SIMD_shortViaFloat(short[] x, short[] y)
{
ArgumentNullException.ThrowIfNull(x);
ArgumentNullException.ThrowIfNull(y);
if (x.Length != y.Length)
throw new ArgumentException("Arrays must have the same length");
var vx = MemoryMarshal.Cast<short, Vector256<short>>(x);
var vy = MemoryMarshal.Cast<short, Vector256<short>>(y);
var vectorSumX = Vector256<float>.Zero;
var vectorSumY = Vector256<float>.Zero;
var vectorSumSquaredXY = Vector256<float>.Zero;
var vectorSumSquaredX = Vector256<float>.Zero;
var vectorSumSquaredY = Vector256<float>.Zero;
for (var i = 0; i < vx.Length; i++)
{
var vectorX = vx[i];
var vectorY = vy[i];
var lowPartX = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorX.GetLower()));
var highPartX = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorX.GetUpper()));
vectorSumX += lowPartX + highPartX;
var lowPartY = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorY.GetLower()));
var highPartY = Avx.ConvertToVector256Single(Avx2.ConvertToVector256Int32(vectorY.GetUpper()));
vectorSumY += lowPartY + highPartY;
vectorSumSquaredXY += lowPartX * lowPartY + highPartX * highPartY;
vectorSumSquaredX += lowPartX * lowPartX + highPartX * highPartX;
vectorSumSquaredY += lowPartY * lowPartY + highPartY * highPartY;
}
var sumX = vectorSumX[0] + vectorSumX[1] + vectorSumX[2] + vectorSumX[3];
var sumY = vectorSumY[0] + vectorSumY[1] + vectorSumY[2] + vectorSumY[3];
var sumSquaredXY = vectorSumSquaredXY[0] + vectorSumSquaredXY[1] + vectorSumSquaredXY[2] + vectorSumSquaredXY[3];
var sumSquaredX = vectorSumSquaredX[0] + vectorSumSquaredX[1] + vectorSumSquaredX[2] + vectorSumSquaredX[3];
var sumSquaredY = vectorSumSquaredY[0] + vectorSumSquaredY[1] + vectorSumSquaredY[2] + vectorSumSquaredY[3];
for (var i = vx.Length * 16; i < x.Length; i++)
{
sumX += x[i];
sumY += y[i];
sumSquaredXY += x[i] * y[i];
sumSquaredX += x[i] * x[i];
sumSquaredY += y[i] * y[i];
}
var r1 = (x.Length * sumSquaredXY - sumX * sumY);
var r2 = Math.Sqrt((x.Length * sumSquaredX - sumX * sumX) * (x.Length * sumSquaredY - sumY * sumY));
return r1 / r2;
}
#endregion
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment