Skip to content

Instantly share code, notes, and snippets.

@eerhardt
Last active June 5, 2018 22:49
Show Gist options
  • Save eerhardt/1483cf76f0d284bf085ae5ba0142d654 to your computer and use it in GitHub Desktop.
Save eerhardt/1483cf76f0d284bf085ae5ba0142d654 to your computer and use it in GitHub Desktop.
CPUMath vs Hardware Intrinsics Benchmark
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.1</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.10.14" />
<PackageReference Include="Microsoft.ML" Version="0.2.0" />
<PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rtm" />
</ItemGroup>
</Project>
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Microsoft.ML.Runtime.Internal.CpuMath;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Security;
namespace IntrinsicsTest
{
public class Program
{
public static void Main(string[] args)
{
BenchmarkRunner.Run<Program>();
}
[Benchmark] public void CpuMath_100() => CpuMath(100);
[Benchmark] public void CpuMath_1_000() => CpuMath(1_000);
[Benchmark] public void CpuMath_10_000() => CpuMath(10_000);
public void CpuMath(int nums)
{
AlignedArray a = new AlignedArray(nums, SseUtils.CbAlign);
AlignedArray b = new AlignedArray(nums, SseUtils.CbAlign);
for (int i = 0; i < a.Size; i++)
{
a[i] = i * 100;
b[i] = i * 100;
}
SseUtils.Add(a, b);
}
[Benchmark] public void CpuMathNative_100() => CpuMathNative(100);
[Benchmark] public void CpuMathNative_1_000() => CpuMathNative(1_000);
[Benchmark] public void CpuMathNative_10_000() => CpuMathNative(10_000);
public void CpuMathNative(int nums)
{
float[] a = new float[nums];
float[] b = new float[nums];
for (int i = 0; i < a.Length; i++)
{
a[i] = i * 100;
b[i] = i * 100;
}
NativeAdd(a, b);
}
private static void NativeAdd(float[] src, float[] dst)
{
unsafe
{
fixed (float* psrc = &src[0])
fixed (float* pdst = &dst[0])
Thunk.AddU(psrc, pdst, dst.Length);
}
}
[Benchmark] public void IntrinsicsMath_100() => IntrinsicsMath(100);
[Benchmark] public void IntrinsicsMath_1_000() => IntrinsicsMath(1_000);
[Benchmark] public void IntrinsicsMath_10_000() => IntrinsicsMath(10_000);
public void IntrinsicsMath(int nums)
{
float[] a = new float[nums];
float[] b = new float[nums];
for (int i = 0; i < a.Length; i++)
{
a[i] = i * 100;
b[i] = i * 100;
}
IntrinsicsUtils.Add(a, b);
}
}
internal unsafe static class Thunk
{
internal const string NativePath = @"CpuMathNative.dll";
[DllImport(NativePath), SuppressUnmanagedCodeSecurity]
public static extern void AddU(/*const*/ float* ps, float* pd, int c);
}
internal class IntrinsicsUtils
{
internal static void Add(float[] src, float[] dst)
{
unsafe
{
fixed (float* psrc = &src[0])
fixed (float* pdst = &dst[0])
{
float* pSrcCurrent = psrc;
float* pDstCurrent = pdst;
float* pEnd = psrc + src.Length;
while (pSrcCurrent < pEnd)
{
Vector128<float> srcVector = Sse.LoadVector128(pSrcCurrent);
Vector128<float> dstVector = Sse.LoadVector128(pDstCurrent);
Vector128<float> result = Sse.Add(srcVector, dstVector);
Sse.Store(pDstCurrent, result);
pSrcCurrent += 4;
pDstCurrent += 4;
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment