Created
March 31, 2022 11:14
-
-
Save madelson/ec76bc142b6c015a38ed077e80eda3d0 to your computer and use it in GitHub Desktop.
Chunk algorithm alternatives for https://github.com/dotnet/runtime/issues/67132
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using BenchmarkDotNet.Attributes; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics.CodeAnalysis; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
namespace Benchmarks | |
{ | |
[MemoryDiagnoser] | |
public class ChunkBenchmark | |
{ | |
[Params("1000,100", "1000,999", "10000,999", "1000,1000000")] | |
public string Input = default!; | |
private int Size; | |
private int ChunkSize; | |
private IEnumerable<string> _source = default!; | |
[GlobalSetup] | |
public void SetUp() | |
{ | |
var split = Input.Split(','); | |
Size = int.Parse(split[0]); | |
ChunkSize = int.Parse(split[1]); | |
_source = Enumerable.Range(0, Size).Select(i => i.ToString()).ToList(); | |
// validate that all methods do the same thing | |
var r1 = _source.Chunk(ChunkSize).ToArray(); | |
var r2 = ChunkList(_source, ChunkSize).ToArray(); | |
var r3 = ChunkFirst(_source, ChunkSize).ToArray(); | |
if (r1.Length != r2.Length || r1.Length != r3.Length) { throw new Exception("Bad length"); } | |
for (var i = 0; i < r1.Length; ++i) | |
{ | |
if (!r1[i].SequenceEqual(r2[i]) || !r1[i].SequenceEqual(r3[i])) { throw new Exception("Bad chunk"); } | |
} | |
} | |
[Benchmark(Baseline = true)] | |
public int ChunkNative() | |
{ | |
var count = 0; | |
foreach (var chunk in _source.Chunk(ChunkSize)) | |
{ | |
++count; | |
} | |
return count; | |
} | |
[Benchmark] | |
public int ChunkList() | |
{ | |
var count = 0; | |
foreach (var chunk in ChunkList(_source, ChunkSize)) | |
{ | |
++count; | |
} | |
return count; | |
} | |
[Benchmark] | |
public int ChunkFirst() | |
{ | |
var count = 0; | |
foreach (var chunk in ChunkFirst(_source, ChunkSize)) | |
{ | |
++count; | |
} | |
return count; | |
} | |
public static IEnumerable<TSource[]> ChunkList<TSource>(IEnumerable<TSource> source, int size) | |
{ | |
if (source == null) | |
{ | |
ArgumentNullException.ThrowIfNull(source); | |
} | |
if (size < 1) | |
{ | |
ArgumentNullException.ThrowIfNull(source); | |
} | |
return ChunkListIterator(source, size); | |
} | |
private static IEnumerable<TSource[]> ChunkListIterator<TSource>(IEnumerable<TSource> source, int size) | |
{ | |
using IEnumerator<TSource> e = source.GetEnumerator(); | |
if (e.MoveNext()) | |
{ | |
List<TSource> chunkBuilder = new(); | |
do | |
{ | |
chunkBuilder.Add(e.Current); | |
while (chunkBuilder.Count < size && e.MoveNext()) | |
{ | |
chunkBuilder.Add(e.Current); | |
} | |
TSource[] chunk = new TSource[chunkBuilder.Count]; | |
chunkBuilder.CopyTo(chunk, 0); | |
yield return chunk; | |
if (chunk.Length < size) | |
{ | |
yield break; | |
} | |
chunkBuilder.Clear(); | |
} | |
while (e.MoveNext()); | |
} | |
} | |
public static IEnumerable<TSource[]> ChunkFirst<TSource>(IEnumerable<TSource> source, int size) | |
{ | |
if (source == null) | |
{ | |
ArgumentNullException.ThrowIfNull(source); | |
} | |
if (size < 1) | |
{ | |
ArgumentNullException.ThrowIfNull(source); | |
} | |
return ChunkFirstIterator(source, size); | |
} | |
private static IEnumerable<TSource[]> ChunkFirstIterator<TSource>(IEnumerable<TSource> source, int size) | |
{ | |
using IEnumerator<TSource> e = source.GetEnumerator(); | |
//if (size > 1000) | |
{ | |
if (TryCreateInitialChunk(e, size, out TSource[]? chunk)) | |
{ | |
yield return chunk; | |
if (chunk.Length < size) | |
{ | |
yield break; | |
} | |
} | |
else | |
{ | |
yield break; | |
} | |
} | |
while (e.MoveNext()) | |
{ | |
TSource[] chunk = new TSource[size]; | |
chunk[0] = e.Current; | |
int i = 1; | |
for (; i < chunk.Length && e.MoveNext(); i++) | |
{ | |
chunk[i] = e.Current; | |
} | |
if (i == chunk.Length) | |
{ | |
yield return chunk; | |
} | |
else | |
{ | |
Array.Resize(ref chunk, i); | |
yield return chunk; | |
yield break; | |
} | |
} | |
} | |
private static bool TryCreateInitialChunk<TSource>(IEnumerator<TSource> enumerator, int size, [NotNullWhen(returnValue: true)] out TSource[]? chunk) | |
{ | |
List<TSource> builder = new(); | |
for (var i = 0; i < size && enumerator.MoveNext(); i++) | |
{ | |
builder.Add(enumerator.Current); | |
} | |
if (builder.Count == 0) | |
{ | |
chunk = null; | |
return false; | |
} | |
chunk = builder.ToArray(); | |
return true; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment