Skip to content

Instantly share code, notes, and snippets.

@gfoidl
Last active October 30, 2020 15:21
Show Gist options
  • Save gfoidl/346ea7472f06e2024b8bdc9c598e547e to your computer and use it in GitHub Desktop.
Save gfoidl/346ea7472f06e2024b8bdc9c598e547e to your computer and use it in GitHub Desktop.
.NET Array cache line alignment / length -- false sharing caution
using System;
using System.Linq;
namespace ConsoleApp3
{
static unsafe class Program
{
static void Main()
{
int[] arr = Enumerable.Range(2, 122).ToArray();
fixed (int* pArr = arr)
{
PrintCacheInfo(pArr - 2 , "(IntPtr)ptr - 1 = Length");
PrintCacheInfo(pArr , "arr[0]");
PrintCacheInfo(pArr + 64, "arr[64]");
}
}
private static void PrintCacheInfo(int* ptr, string caption)
{
const int cacheBlockBits = 6;
const int cacheBlockSize = 1 << cacheBlockBits; // 64
const int cacheBlockMask = cacheBlockSize - 1; // 63
long addr = (long)ptr;
long cacheBlockOffset = addr & cacheBlockMask; // % 64
long cacheBlockAlignedAddr = addr & ~cacheBlockMask;
Console.WriteLine(caption);
Console.WriteLine($"Address: 0x{addr:X}");
Console.WriteLine($"Cache line addr: 0x{cacheBlockAlignedAddr:X}");
Console.WriteLine($"Offset in cl: {cacheBlockOffset} | 0x{cacheBlockOffset:X}");
Console.WriteLine($"Value: {*ptr}");
Console.WriteLine();
}
}
}
@gfoidl
Copy link
Author

gfoidl commented Nov 30, 2018

Demo for false sharing

Result

false-sharing

C# code
using System;
using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;

namespace ConsoleApplication2
{
    class Program
    {
#if DEBUG
        private const int ITERS = 2;
#else
        private const int ITERS = 100_000_000;
#endif
        //---------------------------------------------------------------------
        private static int[]                s_counter = new int[1 << 20];
        private static CountdownEvent       s_cde     = new CountdownEvent(4);
        private static ManualResetEventSlim s_mre     = new ManualResetEventSlim(false);
        private static Stopwatch            s_watch   = new Stopwatch();
        //---------------------------------------------------------------------
        [MethodImpl(MethodImplOptions.AggressiveOptimization)]
        private static void UpdateCounter(int position)
        {
            s_cde.Signal();
            s_mre.Wait();

            for (int j = 0; j < ITERS; j++)
            {
                s_counter[position] += 3;   // any number, just to do a read and a write
            }
        }
        //---------------------------------------------------------------------
        static void Main()
        {
            // Parallel.Invoke will run one task in the main-thread (inlineing of tasks),
            // so be aware of potential deadlocks. Thus "free" the main-thread.

            // JIT
            s_watch.Start();
            Task.Run(() =>
                Parallel.Invoke(
                    () => UpdateCounter(64 * 0 / sizeof(int)),
                    () => UpdateCounter(64 * 1 / sizeof(int)),
                    () => UpdateCounter(64 * 2 / sizeof(int)),
                    () => UpdateCounter(64 * 3 / sizeof(int))
                )
            );
            s_cde  .Wait();
            s_mre  .Set();
            s_watch.Stop();

            Run(padFirstElement: false, "results-no-padfirst.csv");
            Run(padFirstElement: true , "results-padfirst.csv");
        }
        //---------------------------------------------------------------------
        private static void Run(bool padFirstElement, string resultsFileName)
        {
            int pad = padFirstElement ? 1 : 0;

            using (StreamWriter sw = File.CreateText(resultsFileName))
            {
                sw.WriteLine("#spacing [bytes];time [ms]");     // # for GnuPlot

                for (int i = 0; i < 15; ++i)
                {
                    int spacing        = 1 << i;
                    int spacingInBytes = spacing * sizeof(int);

                    Console.Write($"spacing: {spacingInBytes,5} bytes");

                    s_cde.Reset();
                    s_mre.Reset();

                    var task = Task.Run(() =>
                        Parallel.Invoke(
                            () => UpdateCounter(spacingInBytes * (pad + 0) / sizeof(int)),
                            () => UpdateCounter(spacingInBytes * (pad + 1) / sizeof(int)),
                            () => UpdateCounter(spacingInBytes * (pad + 2) / sizeof(int)),
                            () => UpdateCounter(spacingInBytes * (pad + 3) / sizeof(int))
                        )
                    );

                    s_cde  .Wait();
                    s_watch.Restart();
                    s_mre  .Set();
                    task   .Wait();
                    s_watch.Stop();

                    Console.WriteLine($", time: {s_watch.ElapsedMilliseconds}");
                    sw.WriteLine($"{spacingInBytes};{s_watch.ElapsedMilliseconds}");
                }
            }
        }
    }
}
GnuPlot
reset

set datafile separator ';'

set term pngcairo size 1200,600 enhanced
set output 'false-sharing.png'

set grid

set title  'False sharing on updating counter-array'
set title font ",12"

set xlabel 'spacing [bytes]'
set ylabel 'time [ms]'

set logscale x;
set xtics (4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536)

set arrow from 64, graph 0 to 64, graph 1 nohead lc rgb 'red' lw 1
set label at 60,1000 'Cache line size' rotate by 90 front textcolor rgb 'red'

plot'results-no-padfirst.csv' with linespoints lw 2 title 'no padding for first', \
    'results-padfirst.csv'    with linespoints lw 2 title 'padding for first'

@gfoidl
Copy link
Author

gfoidl commented Oct 30, 2020

False sharing in Intel VTune Profiler

Microarchitecture Exploration

w/o padding

grafik

with padding

grafik

Memory Access

w/o padding

grafik

with padding

grafik

Note: It's L1-bound because of the RAW-hazard from the increment now.

BenchmarkDotNet

Shows this effect with the use of hardware counters also clearly:

Method Mean Error StdDev Ratio RatioSD CacheMisses/Op LLCReference/Op LLCMisses/Op
NoPadding 2.325 s 0.0409 s 0.0363 s 1.00 0.00 1,047,666 146,520,838 1,048,121
Padding 1.658 s 0.0239 s 0.0223 s 0.71 0.02 622,924 2,056,746 622,592

demo program
#define PAD

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading.Tasks;

Console.WriteLine($"Size of struct: {Unsafe.SizeOf<Data>()}");

Worker worker = new();
Parallel.Invoke(
    () => Console.WriteLine($"x: {worker.IncX()}"),
    () => Console.WriteLine($"y: {worker.IncY()}")
);

public class Worker
{
    private const uint N = 3_000_000_000;
    private Data _data;

    public uint IncX()
    {
        for (uint i = 0; i < N; ++i)
        {
            _data.X++;
        }

        return _data.X;
    }

    public uint IncY()
    {
        for (uint i = 0; i < N; ++i)
        {
            _data.Y++;
        }

        return _data.Y;
    }
}

[StructLayout(LayoutKind.Explicit)]
public struct Data
{
    [FieldOffset(0)]
    public uint X;

#if PAD
    [FieldOffset(64)]
#else
    [FieldOffset(4)]
#endif
    public uint Y;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment