Skip to content

Instantly share code, notes, and snippets.

@DBalashov
Created December 7, 2024 11:47
Show Gist options
  • Save DBalashov/f7be9ba4468cd6b1714db48020083da4 to your computer and use it in GitHub Desktop.
Save DBalashov/f7be9ba4468cd6b1714db48020083da4 to your computer and use it in GitHub Desktop.
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
// | Method | Categories | N | Mean | Error | StdDev | Ratio |
// |----------------- |----------- |------- |--------------:|-------------:|-----------:|------:|
// | r2_double_simple | double | 256 | 328.51 ns | 0.878 ns | 0.733 ns | 1.00 |
// | r2_double_SIMD | double | 256 | 84.99 ns | 0.234 ns | 0.195 ns | 0.26 |
// | | | | | | | |
// | r2_double_simple | double | 1024 | 1,303.87 ns | 1.334 ns | 1.042 ns | 1.00 |
// | r2_double_SIMD | double | 1024 | 334.40 ns | 0.949 ns | 0.888 ns | 0.26 |
// | | | | | | | |
// | r2_double_simple | double | 262144 | 336,448.25 ns | 606.907 ns | 567.701 ns | 1.00 |
// | r2_double_SIMD | double | 262144 | 86,994.17 ns | 306.840 ns | 287.018 ns | 0.26 |
// | | | | | | | |
// | r2_simple_float | float | 256 | 331.08 ns | 0.350 ns | 0.327 ns | 1.00 |
// | r2_SIMD_float | float | 256 | 48.68 ns | 0.132 ns | 0.110 ns | 0.15 |
// | | | | | | | |
// | r2_simple_float | float | 1024 | 1,322.64 ns | 11.351 ns | 10.618 ns | 1.00 |
// | r2_SIMD_float | float | 1024 | 174.04 ns | 0.270 ns | 0.253 ns | 0.13 |
// | | | | | | | |
// | r2_simple_float | float | 262144 | 336,838.97 ns | 1,039.619 ns | 972.460 ns | 1.00 |
// | r2_SIMD_float | float | 262144 | 43,231.15 ns | 150.809 ns | 141.067 ns | 0.13 |
#pragma warning disable CS8618
var summary = BenchmarkRunner.Run<MainTest>();
[SimpleJob(RuntimeMoniker.Net90), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn]
// [WarmupCount(2)]
// [IterationCount(2)]
// [MemoryDiagnoser]
public class MainTest
{
[Params(256, 1024, 256 * 1024)]
public int N { get; set; }
[GlobalSetup]
public void GlobalSetup()
{
arr1 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray();
arr2 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray();
arr1f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray();
arr2f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray();
}
double[] arr1;
double[] arr2;
float[] arr1f;
float[] arr2f;
#region double
[Benchmark(Baseline = true), BenchmarkCategory("double")]
public double r2_double_simple() => r2_simple_double(arr1, arr2);
[Benchmark, BenchmarkCategory("double")]
public double r2_double_SIMD() => r2_SIMD_double(arr1, arr2);
static double r2_simple_double(double[] fact, double[] prog)
{
ArgumentNullException.ThrowIfNull(fact);
ArgumentNullException.ThrowIfNull(prog);
if (fact.Length != prog.Length)
throw new ArgumentException("Arrays must have the same length");
var avg = 0.0;
foreach (var t in fact)
avg += t;
avg /= fact.Length;
var sstot = 0.0;
var ssres = 0.0;
for (var i = 0; i < fact.Length; i++)
{
var tot = fact[i] - avg;
sstot += tot * tot;
var res = fact[i] - prog[i];
ssres += res * res;
}
return 1 - ssres / sstot;
}
static double r2_SIMD_double(double[] fact, double[] prog)
{
ArgumentNullException.ThrowIfNull(fact);
ArgumentNullException.ThrowIfNull(prog);
if (fact.Length != prog.Length)
throw new ArgumentException("Arrays must have the same length");
var vfact = MemoryMarshal.Cast<double, Vector256<double>>(fact);
var vprog = MemoryMarshal.Cast<double, Vector256<double>>(prog);
var vavg = Vector256<double>.Zero;
foreach (var v in vfact)
vavg += v;
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3];
for (var i = vfact.Length * 4; i < fact.Length; i++)
avg += fact[i];
avg /= fact.Length;
vavg = Vector256.Create(avg);
var vtotfact = Vector256<double>.Zero;
var vresfact = Vector256<double>.Zero;
for (var i = 0; i < vfact.Length; i++)
{
var vf = vfact[i];
var vp = vprog[i];
var factDiff = vf - vavg;
vtotfact += factDiff * factDiff;
var resDiff = vf - vp;
vresfact += resDiff * resDiff;
}
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3];
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3];
for (var i = vfact.Length * 4; i < fact.Length; i++)
{
var factDiff = fact[i] - avg;
sstot += factDiff * factDiff;
var resDiff = fact[i] - prog[i];
ssres += resDiff * resDiff;
}
return 1 - ssres / sstot;
}
#endregion
#region float
[Benchmark(Baseline = true), BenchmarkCategory("float")]
public double r2_simple_float() => r2_simple_float(arr1f, arr2f);
[Benchmark, BenchmarkCategory("float")]
public double r2_SIMD_float() => r2_SIMD_float(arr1f, arr2f);
static double r2_simple_float(float[] fact, float[] prog)
{
ArgumentNullException.ThrowIfNull(fact);
ArgumentNullException.ThrowIfNull(prog);
if (fact.Length != prog.Length)
throw new ArgumentException("Arrays must have the same length");
var avg = 0.0f;
foreach (var t in fact)
avg += t;
avg /= fact.Length;
var sstot = 0.0;
var ssres = 0.0;
for (var i = 0; i < fact.Length; i++)
{
var tot = fact[i] - avg;
sstot += tot * tot;
var res = fact[i] - prog[i];
ssres += res * res;
}
return 1 - ssres / sstot;
}
static double r2_SIMD_float(float[] fact, float[] prog)
{
ArgumentNullException.ThrowIfNull(fact);
ArgumentNullException.ThrowIfNull(prog);
if (fact.Length != prog.Length)
throw new ArgumentException("Arrays must have the same length");
var vfact = MemoryMarshal.Cast<float, Vector256<float>>(fact);
var vprog = MemoryMarshal.Cast<float, Vector256<float>>(prog);
var vavg = Vector256<float>.Zero;
foreach (var v in vfact)
vavg += v;
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3] + vavg[4] + vavg[5] + vavg[6] + vavg[7];
for (var i = vfact.Length * 8; i < fact.Length; i++)
avg += fact[i];
avg /= fact.Length;
vavg = Vector256.Create(avg);
var vtotfact = Vector256<float>.Zero;
var vresfact = Vector256<float>.Zero;
for (var i = 0; i < vfact.Length; i++)
{
var vf = vfact[i];
var vp = vprog[i];
var factDiff = vf - vavg;
vtotfact += factDiff * factDiff;
var resDiff = vf - vp;
vresfact += resDiff * resDiff;
}
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3] + vtotfact[4] + vtotfact[5] + vtotfact[6] + vtotfact[7];
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3] + vresfact[4] + vresfact[5] + vresfact[6] + vresfact[7];
for (var i = vfact.Length * 8; i < fact.Length; i++)
{
var factDiff = fact[i] - avg;
sstot += factDiff * factDiff;
var resDiff = fact[i] - prog[i];
ssres += resDiff * resDiff;
}
return 1 - ssres / sstot;
}
#endregion
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment