Created
November 2, 2015 06:41
-
-
Save theraot/1bfd0deb4a1aab0a27d8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Diagnostics; | |
using System.Reflection.Emit; | |
using System.Runtime.InteropServices; | |
using System.Security; | |
namespace TestPerf | |
{ | |
public class Benchmark | |
{ | |
static unsafe readonly CopyBlockDelegate _cpBlk = GenerateCpBlk(); | |
public unsafe delegate void CopyBlockDelegate(void* des, void* src, uint bytes); | |
[DllImport("msvcrt.dll", EntryPoint = "memcpy", CallingConvention = CallingConvention.Cdecl, SetLastError = false), SuppressUnmanagedCodeSecurity] | |
public static unsafe extern void* memcpy(void* dest, void* src, ulong count); | |
static unsafe void CpBlk(void* dest, void* src, uint count) | |
{ | |
var local = _cpBlk; | |
local(dest, src, count); | |
} | |
static unsafe void Custom(void* dest, void* src, int count) | |
{ | |
var block = count >> 3; | |
var pDest = (long*)dest; | |
var pSrc = (long*)src; | |
for (var i = 0; i < block; i++) | |
{ | |
*pDest = *pSrc; pDest++; pSrc++; | |
} | |
dest = pDest; | |
src = pSrc; | |
count = count - (block << 3); | |
if (count > 0) | |
{ | |
var pDestB = (byte*)dest; | |
var pSrcB = (byte*)src; | |
for (var i = 0; i < count; i++) | |
{ | |
*pDestB = *pSrcB; pDestB++; pSrcB++; | |
} | |
} | |
} | |
static CopyBlockDelegate GenerateCpBlk() | |
{ | |
var method = new DynamicMethod("CopyBlockIL", typeof(void), new[] {typeof(void *), typeof(void *), typeof(uint)}, typeof(Benchmark)); | |
var emitter = method.GetILGenerator(); | |
// emit IL | |
emitter.Emit(OpCodes.Ldarg_0); | |
emitter.Emit(OpCodes.Ldarg_1); | |
emitter.Emit(OpCodes.Ldarg_2); | |
emitter.Emit(OpCodes.Cpblk); | |
emitter.Emit(OpCodes.Ret); | |
// compile to delegate | |
return (CopyBlockDelegate) method.CreateDelegate(typeof (CopyBlockDelegate)); | |
} | |
static unsafe void Main() | |
{ | |
var blockSize = 4; | |
Console.WriteLine(Environment.Is64BitProcess ? "64 bits" : "32 bits"); | |
Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", "Size", nameof(Buffer), nameof(CpBlk), nameof(memcpy), nameof(Array), nameof(Custom), nameof(Marshal)); | |
for (var j = 0; j < 20; j++, blockSize *= 2) | |
{ | |
var dest = new byte[blockSize]; | |
var src = new byte[blockSize]; | |
for (var i = 0; i < blockSize; i++) | |
src[i] = (byte) i; | |
fixed (void* pDest = &dest[0]) | |
fixed (void* pSrc = &src[0]) | |
{ | |
var pDestPtr = (IntPtr) pDest; | |
var pSrcPtr = (IntPtr) pSrc; | |
var count = (1 << 26)/blockSize; | |
var watch = new Stopwatch(); | |
watch.Reset(); | |
watch.Start(); | |
for (var i = 0; i < count; i++) | |
{ | |
Buffer.BlockCopy(src, 0, dest, 0, blockSize); | |
Buffer.BlockCopy(dest, 0, src, 0, blockSize); | |
Buffer.BlockCopy(src, 0, dest, 0, blockSize); | |
Buffer.BlockCopy(dest, 0, src, 0, blockSize); | |
Buffer.BlockCopy(src, 0, dest, 0, blockSize); | |
Buffer.BlockCopy(dest, 0, src, 0, blockSize); | |
Buffer.BlockCopy(src, 0, dest, 0, blockSize); | |
Buffer.BlockCopy(dest, 0, src, 0, blockSize); | |
Buffer.BlockCopy(src, 0, dest, 0, blockSize); | |
Buffer.BlockCopy(dest, 0, src, 0, blockSize); | |
} | |
watch.Stop(); | |
var bufferBlockCopyTime = watch.ElapsedMilliseconds; | |
watch.Reset(); | |
watch.Start(); | |
for (var i = 0; i < count; i++) | |
{ | |
CpBlk(pDest, pSrc, (uint)blockSize); | |
CpBlk(pSrc, pDest, (uint)blockSize); | |
CpBlk(pDest, pSrc, (uint)blockSize); | |
CpBlk(pSrc, pDest, (uint)blockSize); | |
CpBlk(pDest, pSrc, (uint)blockSize); | |
CpBlk(pSrc, pDest, (uint)blockSize); | |
CpBlk(pDest, pSrc, (uint)blockSize); | |
CpBlk(pSrc, pDest, (uint)blockSize); | |
CpBlk(pDest, pSrc, (uint)blockSize); | |
CpBlk(pSrc, pDest, (uint)blockSize); | |
} | |
watch.Stop(); | |
var cpBlkTime = watch.ElapsedMilliseconds; | |
watch.Reset(); | |
watch.Start(); | |
for (var i = 0; i < count; i++) | |
{ | |
memcpy(pDest, pSrc, (ulong) blockSize); | |
memcpy(pSrc, pDest, (ulong) blockSize); | |
memcpy(pDest, pSrc, (ulong) blockSize); | |
memcpy(pSrc, pDest, (ulong) blockSize); | |
memcpy(pDest, pSrc, (ulong) blockSize); | |
memcpy(pSrc, pDest, (ulong) blockSize); | |
memcpy(pDest, pSrc, (ulong) blockSize); | |
memcpy(pSrc, pDest, (ulong) blockSize); | |
memcpy(pDest, pSrc, (ulong) blockSize); | |
memcpy(pSrc, pDest, (ulong) blockSize); | |
} | |
watch.Stop(); | |
var copyMemoryTime = watch.ElapsedMilliseconds; | |
watch.Reset(); | |
watch.Start(); | |
for (var i = 0; i < count; i++) | |
{ | |
Array.Copy(src, dest, blockSize); | |
Array.Copy(dest, src, blockSize); | |
Array.Copy(src, dest, blockSize); | |
Array.Copy(dest, src, blockSize); | |
Array.Copy(src, dest, blockSize); | |
Array.Copy(dest, src, blockSize); | |
Array.Copy(src, dest, blockSize); | |
Array.Copy(dest, src, blockSize); | |
Array.Copy(src, dest, blockSize); | |
Array.Copy(dest, src, blockSize); | |
} | |
watch.Stop(); | |
var arrayCopyTime = watch.ElapsedMilliseconds; | |
watch.Reset(); | |
watch.Start(); | |
for (var i = 0; i < count; i++) | |
{ | |
Custom(pDest, pSrc, blockSize); | |
Custom(pSrc, pDest, blockSize); | |
Custom(pDest, pSrc, blockSize); | |
Custom(pSrc, pDest, blockSize); | |
Custom(pDest, pSrc, blockSize); | |
Custom(pSrc, pDest, blockSize); | |
Custom(pDest, pSrc, blockSize); | |
Custom(pSrc, pDest, blockSize); | |
Custom(pDest, pSrc, blockSize); | |
Custom(pSrc, pDest, blockSize); | |
} | |
watch.Stop(); | |
var customCopyTime = watch.ElapsedMilliseconds; | |
watch.Reset(); | |
watch.Start(); | |
for (var i = 0; i < count; i++) | |
{ | |
Marshal.Copy(src, 0, pDestPtr, blockSize); | |
Marshal.Copy(dest, 0, pSrcPtr, blockSize); | |
Marshal.Copy(src, 0, pDestPtr, blockSize); | |
Marshal.Copy(dest, 0, pSrcPtr, blockSize); | |
Marshal.Copy(src, 0, pDestPtr, blockSize); | |
Marshal.Copy(dest, 0, pSrcPtr, blockSize); | |
Marshal.Copy(src, 0, pDestPtr, blockSize); | |
Marshal.Copy(dest, 0, pSrcPtr, blockSize); | |
Marshal.Copy(src, 0, pDestPtr, blockSize); | |
Marshal.Copy(dest, 0, pSrcPtr, blockSize); | |
} | |
watch.Stop(); | |
var marshalCopyTime = watch.ElapsedMilliseconds; | |
var memFactor = count*10.0*blockSize/0.001/(1024*1024); | |
var bufferCpyOut = memFactor / bufferBlockCopyTime; | |
var cpBlkOut = memFactor/cpBlkTime; | |
var copyMemoryOut = memFactor/copyMemoryTime; | |
var arrayOut = memFactor/arrayCopyTime; | |
var customOut = memFactor/customCopyTime; | |
var marshalOut = memFactor / marshalCopyTime; | |
Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}", blockSize, (long)bufferCpyOut, (long)cpBlkOut, (long)copyMemoryOut, (long)arrayOut, (long)customOut, (long)marshalOut); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The code above is based on High performance memcpy gotchas in C# at xoofx.com by Alexandre Mutel.
Some experimental results shared via Google Drive - Tested on a computer with 64bit Windows 10, on Intel(R) Core(TM) i3-3240 CPU @ 3.40GHz with 8 GB. Compiled for release configuration against .NET 4.0 on Visual Studio 2015 with optimize code flag set for both x86 and x64 target platforms.
Notes: