Last active
October 30, 2020 15:21
-
-
Save gfoidl/346ea7472f06e2024b8bdc9c598e547e to your computer and use it in GitHub Desktop.
.NET Array cache line alignment / length -- false sharing caution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Linq; | |
namespace ConsoleApp3 | |
{ | |
static unsafe class Program | |
{ | |
static void Main() | |
{ | |
int[] arr = Enumerable.Range(2, 122).ToArray(); | |
fixed (int* pArr = arr) | |
{ | |
PrintCacheInfo(pArr - 2 , "(IntPtr)ptr - 1 = Length"); | |
PrintCacheInfo(pArr , "arr[0]"); | |
PrintCacheInfo(pArr + 64, "arr[64]"); | |
} | |
} | |
private static void PrintCacheInfo(int* ptr, string caption) | |
{ | |
const int cacheBlockBits = 6; | |
const int cacheBlockSize = 1 << cacheBlockBits; // 64 | |
const int cacheBlockMask = cacheBlockSize - 1; // 63 | |
long addr = (long)ptr; | |
long cacheBlockOffset = addr & cacheBlockMask; // % 64 | |
long cacheBlockAlignedAddr = addr & ~cacheBlockMask; | |
Console.WriteLine(caption); | |
Console.WriteLine($"Address: 0x{addr:X}"); | |
Console.WriteLine($"Cache line addr: 0x{cacheBlockAlignedAddr:X}"); | |
Console.WriteLine($"Offset in cl: {cacheBlockOffset} | 0x{cacheBlockOffset:X}"); | |
Console.WriteLine($"Value: {*ptr}"); | |
Console.WriteLine(); | |
} | |
} | |
} |
False sharing in Intel VTune Profiler
Microarchitecture Exploration
w/o padding
with padding
Memory Access
w/o padding
with padding
Note: It's L1-bound because of the RAW-hazard from the increment now.
BenchmarkDotNet
Shows this effect with the use of hardware counters also clearly:
Method | Mean | Error | StdDev | Ratio | RatioSD | CacheMisses/Op | LLCReference/Op | LLCMisses/Op |
---|---|---|---|---|---|---|---|---|
NoPadding | 2.325 s | 0.0409 s | 0.0363 s | 1.00 | 0.00 | 1,047,666 | 146,520,838 | 1,048,121 |
Padding | 1.658 s | 0.0239 s | 0.0223 s | 0.71 | 0.02 | 622,924 | 2,056,746 | 622,592 |
demo program
#define PAD
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
Console.WriteLine($"Size of struct: {Unsafe.SizeOf<Data>()}");
Worker worker = new();
Parallel.Invoke(
() => Console.WriteLine($"x: {worker.IncX()}"),
() => Console.WriteLine($"y: {worker.IncY()}")
);
public class Worker
{
private const uint N = 3_000_000_000;
private Data _data;
public uint IncX()
{
for (uint i = 0; i < N; ++i)
{
_data.X++;
}
return _data.X;
}
public uint IncY()
{
for (uint i = 0; i < N; ++i)
{
_data.Y++;
}
return _data.Y;
}
}
[StructLayout(LayoutKind.Explicit)]
public struct Data
{
[FieldOffset(0)]
public uint X;
#if PAD
[FieldOffset(64)]
#else
[FieldOffset(4)]
#endif
public uint Y;
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Demo for false sharing
Result
C# code
GnuPlot