Created
December 7, 2024 11:47
-
-
Save DBalashov/f7be9ba4468cd6b1714db48020083da4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Configs; | |
using BenchmarkDotNet.Jobs; | |
using BenchmarkDotNet.Running; | |
// | Method | Categories | N | Mean | Error | StdDev | Ratio | | |
// |----------------- |----------- |------- |--------------:|-------------:|-----------:|------:| | |
// | r2_double_simple | double | 256 | 328.51 ns | 0.878 ns | 0.733 ns | 1.00 | | |
// | r2_double_SIMD | double | 256 | 84.99 ns | 0.234 ns | 0.195 ns | 0.26 | | |
// | | | | | | | | | |
// | r2_double_simple | double | 1024 | 1,303.87 ns | 1.334 ns | 1.042 ns | 1.00 | | |
// | r2_double_SIMD | double | 1024 | 334.40 ns | 0.949 ns | 0.888 ns | 0.26 | | |
// | | | | | | | | | |
// | r2_double_simple | double | 262144 | 336,448.25 ns | 606.907 ns | 567.701 ns | 1.00 | | |
// | r2_double_SIMD | double | 262144 | 86,994.17 ns | 306.840 ns | 287.018 ns | 0.26 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 256 | 331.08 ns | 0.350 ns | 0.327 ns | 1.00 | | |
// | r2_SIMD_float | float | 256 | 48.68 ns | 0.132 ns | 0.110 ns | 0.15 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 1024 | 1,322.64 ns | 11.351 ns | 10.618 ns | 1.00 | | |
// | r2_SIMD_float | float | 1024 | 174.04 ns | 0.270 ns | 0.253 ns | 0.13 | | |
// | | | | | | | | | |
// | r2_simple_float | float | 262144 | 336,838.97 ns | 1,039.619 ns | 972.460 ns | 1.00 | | |
// | r2_SIMD_float | float | 262144 | 43,231.15 ns | 150.809 ns | 141.067 ns | 0.13 | | |
#pragma warning disable CS8618 | |
var summary = BenchmarkRunner.Run<MainTest>(); | |
[SimpleJob(RuntimeMoniker.Net90), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn] | |
// [WarmupCount(2)] | |
// [IterationCount(2)] | |
// [MemoryDiagnoser] | |
public class MainTest | |
{ | |
[Params(256, 1024, 256 * 1024)] | |
public int N { get; set; } | |
[GlobalSetup] | |
public void GlobalSetup() | |
{ | |
arr1 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray(); | |
arr2 = Enumerable.Range(0, N).Select(_ => Random.Shared.NextDouble()).ToArray(); | |
arr1f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray(); | |
arr2f = Enumerable.Range(0, N).Select(_ => (float) Random.Shared.NextDouble()).ToArray(); | |
} | |
double[] arr1; | |
double[] arr2; | |
float[] arr1f; | |
float[] arr2f; | |
#region double | |
[Benchmark(Baseline = true), BenchmarkCategory("double")] | |
public double r2_double_simple() => r2_simple_double(arr1, arr2); | |
[Benchmark, BenchmarkCategory("double")] | |
public double r2_double_SIMD() => r2_SIMD_double(arr1, arr2); | |
static double r2_simple_double(double[] fact, double[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var avg = 0.0; | |
foreach (var t in fact) | |
avg += t; | |
avg /= fact.Length; | |
var sstot = 0.0; | |
var ssres = 0.0; | |
for (var i = 0; i < fact.Length; i++) | |
{ | |
var tot = fact[i] - avg; | |
sstot += tot * tot; | |
var res = fact[i] - prog[i]; | |
ssres += res * res; | |
} | |
return 1 - ssres / sstot; | |
} | |
static double r2_SIMD_double(double[] fact, double[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vfact = MemoryMarshal.Cast<double, Vector256<double>>(fact); | |
var vprog = MemoryMarshal.Cast<double, Vector256<double>>(prog); | |
var vavg = Vector256<double>.Zero; | |
foreach (var v in vfact) | |
vavg += v; | |
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3]; | |
for (var i = vfact.Length * 4; i < fact.Length; i++) | |
avg += fact[i]; | |
avg /= fact.Length; | |
vavg = Vector256.Create(avg); | |
var vtotfact = Vector256<double>.Zero; | |
var vresfact = Vector256<double>.Zero; | |
for (var i = 0; i < vfact.Length; i++) | |
{ | |
var vf = vfact[i]; | |
var vp = vprog[i]; | |
var factDiff = vf - vavg; | |
vtotfact += factDiff * factDiff; | |
var resDiff = vf - vp; | |
vresfact += resDiff * resDiff; | |
} | |
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3]; | |
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3]; | |
for (var i = vfact.Length * 4; i < fact.Length; i++) | |
{ | |
var factDiff = fact[i] - avg; | |
sstot += factDiff * factDiff; | |
var resDiff = fact[i] - prog[i]; | |
ssres += resDiff * resDiff; | |
} | |
return 1 - ssres / sstot; | |
} | |
#endregion | |
#region float | |
[Benchmark(Baseline = true), BenchmarkCategory("float")] | |
public double r2_simple_float() => r2_simple_float(arr1f, arr2f); | |
[Benchmark, BenchmarkCategory("float")] | |
public double r2_SIMD_float() => r2_SIMD_float(arr1f, arr2f); | |
static double r2_simple_float(float[] fact, float[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var avg = 0.0f; | |
foreach (var t in fact) | |
avg += t; | |
avg /= fact.Length; | |
var sstot = 0.0; | |
var ssres = 0.0; | |
for (var i = 0; i < fact.Length; i++) | |
{ | |
var tot = fact[i] - avg; | |
sstot += tot * tot; | |
var res = fact[i] - prog[i]; | |
ssres += res * res; | |
} | |
return 1 - ssres / sstot; | |
} | |
static double r2_SIMD_float(float[] fact, float[] prog) | |
{ | |
ArgumentNullException.ThrowIfNull(fact); | |
ArgumentNullException.ThrowIfNull(prog); | |
if (fact.Length != prog.Length) | |
throw new ArgumentException("Arrays must have the same length"); | |
var vfact = MemoryMarshal.Cast<float, Vector256<float>>(fact); | |
var vprog = MemoryMarshal.Cast<float, Vector256<float>>(prog); | |
var vavg = Vector256<float>.Zero; | |
foreach (var v in vfact) | |
vavg += v; | |
var avg = vavg[0] + vavg[1] + vavg[2] + vavg[3] + vavg[4] + vavg[5] + vavg[6] + vavg[7]; | |
for (var i = vfact.Length * 8; i < fact.Length; i++) | |
avg += fact[i]; | |
avg /= fact.Length; | |
vavg = Vector256.Create(avg); | |
var vtotfact = Vector256<float>.Zero; | |
var vresfact = Vector256<float>.Zero; | |
for (var i = 0; i < vfact.Length; i++) | |
{ | |
var vf = vfact[i]; | |
var vp = vprog[i]; | |
var factDiff = vf - vavg; | |
vtotfact += factDiff * factDiff; | |
var resDiff = vf - vp; | |
vresfact += resDiff * resDiff; | |
} | |
var sstot = vtotfact[0] + vtotfact[1] + vtotfact[2] + vtotfact[3] + vtotfact[4] + vtotfact[5] + vtotfact[6] + vtotfact[7]; | |
var ssres = vresfact[0] + vresfact[1] + vresfact[2] + vresfact[3] + vresfact[4] + vresfact[5] + vresfact[6] + vresfact[7]; | |
for (var i = vfact.Length * 8; i < fact.Length; i++) | |
{ | |
var factDiff = fact[i] - avg; | |
sstot += factDiff * factDiff; | |
var resDiff = fact[i] - prog[i]; | |
ssres += resDiff * resDiff; | |
} | |
return 1 - ssres / sstot; | |
} | |
#endregion | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment