Created
December 8, 2024 12:46
-
-
Save DBalashov/4ad2d64228bfc26c7539c1d2187a7d91 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Configs; | |
using BenchmarkDotNet.Jobs; | |
using BenchmarkDotNet.Running; | |
// | Method | Categories | N | Mean | Error | StdDev | Ratio | | |
// |------------------- |----------- |------- |--------------:|-----------:|-----------:|------:| | |
// | r2_double_simple | double | 256 | 328.16 ns | 0.666 ns | 0.623 ns | 1.00 | | |
// | r2_double_SIMD | double | 256 | 83.81 ns | 0.194 ns | 0.172 ns | 0.26 | | |
// | r2_double_SIMD_512 | double | 256 | 57.43 ns | 0.121 ns | 0.113 ns | 0.18 | | |
// | | | | | | | | | |
// | r2_double_simple | double | 1024 | 1,306.46 ns | 2.553 ns | 2.388 ns | 1.00 | | |
// | r2_double_SIMD | double | 1024 | 332.19 ns | 0.553 ns | 0.490 ns | 0.25 | | |
// | r2_double_SIMD_512 | double | 1024 | 211.49 ns | 0.684 ns | 0.640 ns | 0.16 | | |
// | | | | | | | | | |
// | r2_double_simple | double | 8192 | 10,447.47 ns | 18.444 ns | 16.350 ns | 1.00 | | |
// | r2_double_SIMD | double | 8192 | 2,640.74 ns | 9.755 ns | 9.125 ns | 0.25 | | |
// | r2_double_SIMD_512 | double | 8192 | 1,744.60 ns | 15.390 ns | 14.396 ns | 0.17 | | |
// | | | | | | | | | |
// | r2_double_simple | double | 32768 | 42,124.57 ns | 129.262 ns | 120.912 ns | 1.00 | | |
// | r2_double_SIMD | double | 32768 | 10,619.25 ns | 25.535 ns | 22.636 ns | 0.25 | | |
// | r2_double_SIMD_512 | double | 32768 | 7,215.71 ns | 22.042 ns | 20.618 ns | 0.17 | | |
// | | | | | | | | | |
// | r2_double_simple | double | 262144 | 336,632.37 ns | 433.968 ns | 338.814 ns | 1.00 | | |
// | r2_double_SIMD | double | 262144 | 86,340.07 ns | 358.094 ns | 334.962 ns | 0.26 | | |
// | r2_double_SIMD_512 | double | 262144 | 60,434.60 ns | 286.446 ns | 267.941 ns | 0.18 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 256 | 330.74 ns | 0.494 ns | 0.438 ns | 1.00 | | |
// | r2_SIMD_float | float | 256 | 48.32 ns | 0.101 ns | 0.094 ns | 0.15 | | |
// | r2_SIMD_float_512 | float | 256 | 35.79 ns | 0.077 ns | 0.064 ns | 0.11 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 1024 | 1,315.37 ns | 2.455 ns | 2.177 ns | 1.00 | | |
// | r2_SIMD_float | float | 1024 | 173.25 ns | 0.367 ns | 0.343 ns | 0.13 | | |
// | r2_SIMD_float_512 | float | 1024 | 108.02 ns | 0.433 ns | 0.405 ns | 0.08 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 8192 | 10,460.41 ns | 24.251 ns | 22.685 ns | 1.00 | | |
// | r2_SIMD_float | float | 8192 | 1,329.44 ns | 2.635 ns | 2.201 ns | 0.13 | | |
// | r2_SIMD_float_512 | float | 8192 | 865.05 ns | 3.402 ns | 3.182 ns | 0.08 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 32768 | 42,468.47 ns | 413.560 ns | 386.844 ns | 1.00 | | |
// | r2_SIMD_float | float | 32768 | 5,271.61 ns | 21.083 ns | 18.690 ns | 0.12 | | |
// | r2_SIMD_float_512 | float | 32768 | 3,465.70 ns | 6.359 ns | 5.637 ns | 0.08 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 262144 | 335,978.50 ns | 537.433 ns | 476.421 ns | 1.00 | | |
// | r2_SIMD_float | float | 262144 | 43,055.83 ns | 155.537 ns | 145.490 ns | 0.13 | | |
// | r2_SIMD_float_512 | float | 262144 | 29,530.19 ns | 80.202 ns | 75.021 ns | 0.09 | | |
#pragma warning disable CS8618 | |
var summary = BenchmarkRunner.Run<MainTest>(); | |
[SimpleJob(RuntimeMoniker.Net90), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn] | |
// [WarmupCount(2)] | |
// [IterationCount(2)] | |
// [MemoryDiagnoser] | |
public class MainTest | |
{ | |
[Params(256, 1024, 8 * 1024, 32 * 1024, 256 * 1024)] | |
public int N { get; set; } | |
[GlobalSetup] | |
public void GlobalSetup() | |
{ | |
arr1 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray(); | |
arr2 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray(); | |
arr1f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray(); | |
arr2f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray(); | |
} | |
double[] arr1; | |
double[] arr2; | |
float[] arr1f; | |
float[] arr2f; | |
#region double | |
[Benchmark(Baseline = true), BenchmarkCategory("double")] | |
public double r2_double_simple() => r2_simple_double(arr1, arr2); | |
[Benchmark, BenchmarkCategory("double")] | |
public double r2_double_SIMD() => r2_SIMD_double(arr1, arr2); | |
[Benchmark, BenchmarkCategory("double")] | |
public double r2_double_SIMD_512() => r2_SIMD_double_512(arr1, arr2); | |
static double r2_simple_double(double[] fact, double[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var avg = 0.0; | |
foreach (var t in fact) | |
avg += t; | |
avg /= fact.Length; | |
var sstot = 0.0; | |
var ssres = 0.0; | |
for (var i = 0; i < fact.Length; i++) | |
{ | |
var tot = fact[i] - avg; | |
sstot += tot * tot; | |
var res = fact[i] - prog[i]; | |
ssres += res * res; | |
} | |
return 1 - ssres / sstot; | |
} | |
static double r2_SIMD_double(double[] fact, double[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vfact = MemoryMarshal.Cast<double, Vector256<double>>(fact); | |
var vprog = MemoryMarshal.Cast<double, Vector256<double>>(prog); | |
var vavg = Vector256<double>.Zero; | |
foreach (var v in vfact) | |
vavg += v; | |
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3]; | |
for (var i = vfact.Length * 4; i < fact.Length; i++) | |
avg += fact[i]; | |
avg /= fact.Length; | |
vavg = Vector256.Create(avg); | |
var vtotfact = Vector256<double>.Zero; | |
var vresfact = Vector256<double>.Zero; | |
for (var i = 0; i < vfact.Length; i++) | |
{ | |
var vf = vfact[i]; | |
var factDiff = vf - vavg; | |
vtotfact += factDiff * factDiff; | |
var resDiff = vf - vprog[i]; | |
vresfact += resDiff * resDiff; | |
} | |
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3]; | |
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3]; | |
for (var i = vfact.Length * 4; i < fact.Length; i++) | |
{ | |
var factDiff = fact[i] - avg; | |
sstot += factDiff * factDiff; | |
var resDiff = fact[i] - prog[i]; | |
ssres += resDiff * resDiff; | |
} | |
return 1 - ssres / sstot; | |
} | |
static double r2_SIMD_double_512(double[] fact, double[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vfact = MemoryMarshal.Cast<double, Vector512<double>>(fact); | |
var vprog = MemoryMarshal.Cast<double, Vector512<double>>(prog); | |
var vavg = Vector256<double>.Zero; | |
foreach (var v in vfact) | |
vavg += v.GetLower() + v.GetUpper(); | |
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3]; | |
for (var i = vfact.Length * 8; i < fact.Length; i++) | |
avg += fact[i]; | |
avg /= fact.Length; | |
vavg = Vector256.Create(avg); | |
var vtotfact = Vector256<double>.Zero; | |
var vresfact = Vector256<double>.Zero; | |
for (var i = 0; i < vfact.Length; i++) | |
{ | |
var vfLower = vfact[i].GetLower(); | |
var vfUpper = vfact[i].GetUpper(); | |
var factDiffLow = vfLower - vavg; | |
var factDiffUpper = vfUpper - vavg; | |
vtotfact += factDiffUpper * factDiffUpper + factDiffLow * factDiffLow; | |
var resDiffLow = vfLower - vprog[i].GetLower(); | |
var resDiffUpper = vfUpper - vprog[i].GetUpper(); | |
vresfact += resDiffLow * resDiffLow + resDiffUpper * resDiffUpper; | |
} | |
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3]; | |
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3]; | |
for (var i = vfact.Length * 8; i < fact.Length; i++) | |
{ | |
var factDiff = fact[i] - avg; | |
sstot += factDiff * factDiff; | |
var resDiff = fact[i] - prog[i]; | |
ssres += resDiff * resDiff; | |
} | |
return 1 - ssres / sstot; | |
} | |
#endregion | |
#region float | |
[Benchmark(Baseline = true), BenchmarkCategory("float")] | |
public double r2_simple_float() => r2_simple_float(arr1f, arr2f); | |
[Benchmark, BenchmarkCategory("float")] | |
public double r2_SIMD_float() => r2_SIMD_float(arr1f, arr2f); | |
[Benchmark, BenchmarkCategory("float")] | |
public double r2_SIMD_float_512() => r2_SIMD_float_512(arr1f, arr2f); | |
static double r2_simple_float(float[] fact, float[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var avg = 0.0f; | |
foreach (var t in fact) | |
avg += t; | |
avg /= fact.Length; | |
var sstot = 0.0; | |
var ssres = 0.0; | |
for (var i = 0; i < fact.Length; i++) | |
{ | |
var tot = fact[i] - avg; | |
sstot += tot * tot; | |
var res = fact[i] - prog[i]; | |
ssres += res * res; | |
} | |
return 1 - ssres / sstot; | |
} | |
static double r2_SIMD_float(float[] fact, float[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vfact = MemoryMarshal.Cast<float, Vector256<float>>(fact); | |
var vprog = MemoryMarshal.Cast<float, Vector256<float>>(prog); | |
var vavg = Vector256<float>.Zero; | |
foreach (var v in vfact) | |
vavg += v; | |
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3] + vavg[4] + vavg[5] + vavg[6] + vavg[7]; | |
for (var i = vfact.Length * 8; i < fact.Length; i++) | |
avg += fact[i]; | |
avg /= fact.Length; | |
vavg = Vector256.Create(avg); | |
var vtotfact = Vector256<float>.Zero; | |
var vresfact = Vector256<float>.Zero; | |
for (var i = 0; i < vfact.Length; i++) | |
{ | |
var vf = vfact[i]; | |
var vp = vprog[i]; | |
var factDiff = vf - vavg; | |
vtotfact += factDiff * factDiff; | |
var resDiff = vf - vp; | |
vresfact += resDiff * resDiff; | |
} | |
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3] + vtotfact[4] + vtotfact[5] + vtotfact[6] + vtotfact[7]; | |
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3] + vresfact[4] + vresfact[5] + vresfact[6] + vresfact[7]; | |
for (var i = vfact.Length * 8; i < fact.Length; i++) | |
{ | |
var factDiff = fact[i] - avg; | |
sstot += factDiff * factDiff; | |
var resDiff = fact[i] - prog[i]; | |
ssres += resDiff * resDiff; | |
} | |
return 1 - ssres / sstot; | |
} | |
static double r2_SIMD_float_512(float[] fact, float[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vfact = MemoryMarshal.Cast<float, Vector512<float>>(fact); | |
var vprog = MemoryMarshal.Cast<float, Vector512<float>>(prog); | |
var vavg = Vector256<float>.Zero; | |
foreach (var v in vfact) | |
vavg += v.GetLower() + v.GetUpper(); | |
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3] + vavg[4] + vavg[5] + vavg[6] + vavg[7]; | |
for (var i = vfact.Length * 16; i < fact.Length; i++) | |
avg += fact[i]; | |
avg /= fact.Length; | |
vavg = Vector256.Create(avg); | |
var vtotfact = Vector256<float>.Zero; | |
var vresfact = Vector256<float>.Zero; | |
for (var i = 0; i < vfact.Length; i++) | |
{ | |
var vf = vfact[i]; | |
var vfLower = vf.GetLower(); | |
var vfUpper = vf.GetUpper(); | |
var factDiffLow = vfLower - vavg; | |
var factDiffUpper = vfUpper - vavg; | |
vtotfact += factDiffUpper * factDiffUpper + factDiffLow * factDiffLow; | |
var vp = vprog[i]; | |
var resDiffLow = vfLower - vp.GetLower(); | |
var resDiffUpper = vfUpper - vp.GetUpper(); | |
vresfact += resDiffLow * resDiffLow + resDiffUpper * resDiffUpper; | |
} | |
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3] + vtotfact[4] + vtotfact[5] + vtotfact[6] + vtotfact[7]; | |
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3] + vresfact[4] + vresfact[5] + vresfact[6] + vresfact[7]; | |
for (var i = vfact.Length * 16; i < fact.Length; i++) | |
{ | |
var factDiff = fact[i] - avg; | |
sstot += factDiff * factDiff; | |
var resDiff = fact[i] - prog[i]; | |
ssres += resDiff * resDiff; | |
} | |
return 1 - ssres / sstot; | |
} | |
#endregion | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment