Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mattwarren/7227890bf6fc76cf050231511089d6da to your computer and use it in GitHub Desktop.
Save mattwarren/7227890bf6fc76cf050231511089d6da to your computer and use it in GitHub Desktop.
ContinueWith Micro Benchmark
[Config(typeof(Config))]
public class ContinueWithAllocations
{
private class Config : ManualConfig
{
public Config()
{
Add(new MemoryDiagnoser());
}
}
[Params(100, 200, 500)]
public int NumberOfTasks { get; set; }
private ConcurrentDictionary<Task, Task> tasks;
[Setup]
public void SetUp()
{
tasks = new ConcurrentDictionary<Task, Task>();
}
[Benchmark]
public Task ContinueWithClojureCapture()
{
for (int i = 0; i < NumberOfTasks; i++)
{
var runningTask = Task.Delay(1);
tasks.TryAdd(runningTask, runningTask);
#pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
runningTask.ContinueWith(t =>
{
Task toBeRemoved;
tasks.TryRemove(t, out toBeRemoved);
}, TaskContinuationOptions.ExecuteSynchronously);
#pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
}
return Task.WhenAll(tasks.Values);
}
[Benchmark]
public Task ContinueWithWithoutClojureCapture()
{
for (int i = 0; i < NumberOfTasks; i++)
{
var runningTask = Task.Delay(1);
tasks.TryAdd(runningTask, runningTask);
#pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
runningTask.ContinueWith((t, state) =>
{
var runningTasks = (ConcurrentDictionary<Task, Task>) state;
Task toBeRemoved;
runningTasks.TryRemove(t, out toBeRemoved);
}, tasks, TaskContinuationOptions.ExecuteSynchronously);
#pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
}
return Task.WhenAll(tasks.Values);
}
}
BenchmarkDotNet=v0.9.7.0
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-3615QM CPU 2.30GHz, ProcessorCount=8
Frequency=2241002 ticks, Resolution=446.2290 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=64-bit RELEASE [RyuJIT]
JitModules=clrjit-v4.6.1080.0

Type=ContinueWithAllocations  Mode=Throughput  
                        Method | Platform |       Jit | NumberOfTasks |        Median |      StdDev | Gen 0 | Gen 1 | Gen 2 | Bytes Allocated/Op |

---------------------------------- |--------- |---------- |-------------- |-------------- |------------ |------ |------ |------ |------------------- | ContinueWithClojureCapture | X64 | LegacyJit | 100 | 584.2819 us | 31.0444 us | 0,00 | 0,00 | 0,00 | 72 771,83 | ContinueWithWithoutClojureCapture | X64 | LegacyJit | 100 | 562.1988 us | 22.2841 us | 0,00 | 0,00 | 0,00 | 72 515,91 | ContinueWithClojureCapture | X64 | RyuJit | 100 | 576.2550 us | 28.2475 us | 0,00 | 0,00 | 0,00 | 72 575,64 | ContinueWithWithoutClojureCapture | X64 | RyuJit | 100 | 565.3582 us | 25.2964 us | 0,00 | 0,00 | 0,00 | 69 739,98 | ContinueWithClojureCapture | X86 | LegacyJit | 100 | NA | NA | - | - | - | NaN | ContinueWithWithoutClojureCapture | X86 | LegacyJit | 100 | NA | NA | - | - | - | NaN | ContinueWithClojureCapture | X64 | LegacyJit | 200 | 931.4733 us | 52.2089 us | 0,00 | 0,00 | 0,00 | 108 105,39 | ContinueWithWithoutClojureCapture | X64 | LegacyJit | 200 | 893.3913 us | 45.0929 us | 0,00 | 0,00 | 0,00 | 92 125,85 | ContinueWithClojureCapture | X64 | RyuJit | 200 | 907.1329 us | 44.0440 us | 0,00 | 0,00 | 0,00 | 108 621,39 | ContinueWithWithoutClojureCapture | X64 | RyuJit | 200 | 927.7449 us | 59.2315 us | 0,00 | 0,00 | 0,00 | 107 089,14 | ContinueWithClojureCapture | X86 | LegacyJit | 200 | NA | NA | - | - | - | NaN | ContinueWithWithoutClojureCapture | X86 | LegacyJit | 200 | NA | NA | - | - | - | NaN | ContinueWithClojureCapture | X64 | LegacyJit | 500 | 1,848.7510 us | 159.8408 us | 0,00 | 0,00 | 0,00 | 226 353,87 | ContinueWithWithoutClojureCapture | X64 | LegacyJit | 500 | 1,838.6481 us | 135.8207 us | 0,00 | 0,00 | 0,00 | 183 727,11 | ContinueWithClojureCapture | X64 | RyuJit | 500 | 1,892.3359 us | 145.9936 us | 0,00 | 0,00 | 0,00 | 132 275,79 | ContinueWithWithoutClojureCapture | X64 | RyuJit | 500 | 1,834.6669 us | 131.8214 us | 0,00 | 0,00 | 0,00 | 187 833,11 | ContinueWithClojureCapture | X86 | LegacyJit | 500 | NA | NA | - | - | - | NaN | ContinueWithWithoutClojureCapture | X86 | LegacyJit | 500 | NA | NA | - | - | - | NaN |

Benchmarks with issues: ContinueWithAllocations_ContinueWithClojureCapture_LegacyX86_NumberOfTasks-100 ContinueWithAllocations_ContinueWithWithoutClojureCapture_LegacyX86_NumberOfTasks-100 ContinueWithAllocations_ContinueWithClojureCapture_LegacyX86_NumberOfTasks-200 ContinueWithAllocations_ContinueWithWithoutClojureCapture_LegacyX86_NumberOfTasks-200 ContinueWithAllocations_ContinueWithClojureCapture_LegacyX86_NumberOfTasks-500 ContinueWithAllocations_ContinueWithWithoutClojureCapture_LegacyX86_NumberOfTasks-500

@mattwarren
Copy link
Author

@danielmarbach, yep!

I suspect that the large number of tasks being created is skewing the memory usage stats, for instance with NumberOfTasks = 1, 5 and 10, I get the following results:

BenchmarkDotNet=v0.9.7.0
OS=Microsoft Windows NT 6.1.7601 Service Pack 1
Processor=Intel(R) Core(TM) i7-4800MQ CPU 2.70GHz, ProcessorCount=8
Frequency=2630683 ticks, Resolution=380.1294 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=32-bit RELEASE
JitModules=clrjit-v4.6.1076.0

Type=ContinueWithAllocations  Mode=Throughput  
Method NumberOfTasks Median StdDev Gen 0 Gen 1 Gen 2 Bytes Allocated/Op
ContinueWithClojureCapture 1 44.7095 us 2.8989 us 23.32 17.69 - 2,714.27
ContinueWithWithoutClojureCapture 1 46.3050 us 2.5083 us 24.68 14.88 - 2,581.80
ContinueWithClojureCapture 5 100.6687 us 3.3937 us 52.89 36.32 - 6,442.19
ContinueWithWithoutClojureCapture 5 98.7882 us 5.3014 us 51.51 44.11 - 6,812.53
ContinueWithClojureCapture 10 131.7470 us 6.3743 us 78.00 59.00 - 10,302.84
ContinueWithWithoutClojureCapture 10 132.9372 us 9.7847 us 82.84 61.36 0.51 10,675.89

@mattwarren
Copy link
Author

For a more general look at lambda allocations (i.e. without the task-based stuff) see https://gist.github.com/mattwarren/480848b4eafb5843602eedd510d9e1ae, that shows the difference more clearly

@danielmarbach
Copy link

But even in the update results only with 1 task ContinueWithWithoutClojureCapture allocates less bytes. I all other scenarios ContinueWithClojureCapture seems to allocate less. I have to come up with a better test

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment