Last active
November 18, 2019 19:09
-
-
Save EgorBo/164c1523774ed8be47029dd42fa530bf to your computer and use it in GitHub Desktop.
SSE4.1 and AVX2 in .NET Core 2.1.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Running; | |
namespace ConsoleApp29 | |
{ | |
public class Program | |
{ | |
/* | |
BenchmarkDotNet=v0.10.14, OS=Windows 10.0.16299.371 (1709/FallCreatorsUpdate/Redstone3) | |
Intel Core i7-8700K CPU 3.70GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores | |
Frequency=3609373 Hz, Resolution=277.0564 ns, Timer=TSC | |
.NET Core SDK=2.1.300-preview2-008530 | |
[Host] : .NET Core 2.1.0-preview2-26406-04 (CoreCLR 4.6.26406.07, CoreFX 4.6.26406.04), 64bit RyuJIT | |
DefaultJob : .NET Core 2.1.0-preview2-26406-04 (CoreCLR 4.6.26406.07, CoreFX 4.6.26406.04), 64bit RyuJIT | |
Method | Mean | Error | StdDev | | |
------------------------- |-----------:|----------:|----------:| | |
IsSorted_Benchmark | 2,595.4 ns | 0.5622 ns | 0.4984 ns | | |
IsSorted_Sse41_Benchmark | 768.8 ns | 1.1433 ns | 1.0135 ns | | |
IsSorted_Avx2 | 416.6 ns | 0.0970 ns | 0.0908 ns | | |
*/ | |
static void Main() | |
{ | |
BenchmarkRunner.Run<Program>(); | |
} | |
int[] testArray; | |
[GlobalSetup] | |
public void GlobalSetup() | |
{ | |
testArray = Enumerable.Range(1, 4096).ToArray(); | |
} | |
[Benchmark] | |
public bool IsSorted_Benchmark() | |
{ | |
return IsSorted_Regular(testArray); | |
} | |
[Benchmark] | |
public bool IsSorted_Sse41_Benchmark() | |
{ | |
return IsSorted_Sse41(testArray); | |
} | |
[Benchmark] | |
public bool IsSorted_Avx2() | |
{ | |
return IsSorted_Avx2(testArray); | |
} | |
static bool IsSorted_Regular(int[] array) | |
{ | |
if (array.Length < 2) | |
return true; | |
for (int i = 0; i < array.Length - 1; i++) | |
{ | |
if (array[i] > array[i + 1]) | |
return false; | |
} | |
return true; | |
} | |
static unsafe bool IsSorted_Sse41(int[] array) | |
{ | |
if (array.Length < 2) | |
return true; | |
if (!Sse41.IsSupported) //if there is no HW support - go with regular | |
return IsSorted_Regular(array); | |
int i = 0; | |
fixed (int* ptr = &array[0]) | |
{ | |
if (array.Length > 4) | |
{ | |
for (; i < array.Length - 4; i += 4) | |
{ | |
Vector128<int> curr = Sse2.LoadVector128(ptr + i); | |
Vector128<int> next = Sse2.LoadVector128(ptr + i + 1); | |
Vector128<int> mask = Sse2.CompareGreaterThan(curr, next); | |
if (!Sse41.TestAllZeros(mask, mask)) | |
return false; | |
} | |
} | |
} | |
for (; i < array.Length - 1; i++) | |
{ | |
if (array[i] > array[i + 1]) | |
return false; | |
} | |
return true; | |
} | |
static unsafe bool IsSorted_Avx2(int[] array) | |
{ | |
if (array.Length < 2) | |
return true; | |
if (!Avx2.IsSupported) | |
return IsSorted_Regular(array); | |
fixed (int* ptr = &array[0]) | |
{ | |
int i = 0; | |
if (array.Length > 8) | |
{ | |
for (; i < array.Length - 8; i += 8) | |
{ | |
var curr = Avx.LoadVector256(ptr + i); | |
var next = Avx.LoadVector256(ptr + i + 1); | |
var mask = Avx2.CompareGreaterThan(curr, next); | |
if (!Avx.TestZ(mask, mask)) | |
return false; | |
} | |
} | |
for (; i + 1 < array.Length; i++) | |
{ | |
if (array[i] > array[i + 1]) | |
return false; | |
} | |
} | |
return true; | |
} | |
} | |
// SSE and AVX implementations are based on WojciechMula's C++ examples: https://github.com/WojciechMula/toys/blob/master/is_sorted/avx2.simd-generic.cpp | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment