Skip to content

Instantly share code, notes, and snippets.

@aannenko
Last active January 14, 2025 10:00
Show Gist options
  • Save aannenko/babd86e8c88ddce960e5dcdf67d1817a to your computer and use it in GitHub Desktop.
Save aannenko/babd86e8c88ddce960e5dcdf67d1817a to your computer and use it in GitHub Desktop.
Find magnet link on a web page
BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2605)
13th Gen Intel Core i9-13900K, 1 CPU, 32 logical and 24 physical cores
.NET SDK 9.0.101
[Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2
DefaultJob : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2
| Method | Mean | Error | StdDev | Ratio | Gen0 | Gen1 | Allocated | Alloc Ratio |
|------------------ |----------:|----------:|----------:|------:|-------:|-------:|----------:|------------:|
| FindMagnetInLines | 24.945 μs | 0.2800 μs | 0.2619 μs | 1.00 | 4.1504 | 0.0610 | 78168 B | 1.000 |
| FindMagnetInBytes | 2.026 μs | 0.0095 μs | 0.0089 μs | 0.08 | 0.0153 | - | 312 B | 0.004 |
using System.Buffers;
using System.Text.RegularExpressions;
using System.Text;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
BenchmarkRunner.Run<MagnetRetrievalBenchmark>(args: args);
[MemoryDiagnoser]
public class MagnetRetrievalBenchmark
{
private const int _bufferSize = 2048;
private const int _keepFromLastBuffer = _bufferSize / 16;
private static readonly Regex _regex = new(@"magnet:\?xt=urn:btih:[^""]+", RegexOptions.Compiled | RegexOptions.ExplicitCapture, TimeSpan.FromMilliseconds(100));
private static readonly Stream _stream = new MemoryStream();
public MagnetRetrievalBenchmark()
{
using var webStream = new HttpClient().GetStreamAsync("https://nnmclub.to/forum/viewtopic.php?t=1716490").Result;
webStream.CopyTo(_stream);
}
[Benchmark(Baseline = true)]
public async Task<Uri?> FindMagnetInLines() // Benchmark methods must be public.
{
_stream.Position = 0;
using var reader = new StreamReader(_stream, leaveOpen: true);
var nextLineTask = reader.ReadLineAsync();
string? line;
while ((line = await nextLineTask.ConfigureAwait(false)) is not null)
{
nextLineTask = reader.ReadLineAsync();
if (_regex.TryGetFirstMatch(line, out var magnetRange))
return new(line[magnetRange]);
}
return null;
}
[Benchmark]
public async Task<Uri?> FindMagnetInBytes()
{
_stream.Position = 0;
var byteBuffer = ArrayPool<byte>.Shared.Rent(_bufferSize);
try
{
var read = 0;
while ((read = await _stream.ReadAsync(byteBuffer.AsMemory(_keepFromLastBuffer))
.ConfigureAwait(false)) > 0)
{
var bytesToSearchIn = byteBuffer.AsSpan(0, _keepFromLastBuffer + read);
var indexOfMagnet = bytesToSearchIn.IndexOfStartOf("magnet:?"u8);
if (indexOfMagnet is -1)
{
bytesToSearchIn[^_keepFromLastBuffer..].CopyTo(byteBuffer);
continue;
}
if (indexOfMagnet >= _keepFromLastBuffer * 4)
{
var bytesToShiftToStart = bytesToSearchIn[(indexOfMagnet - _keepFromLastBuffer)..];
var shiftedBytesLength = bytesToShiftToStart.Length;
bytesToShiftToStart.CopyTo(byteBuffer);
read = await _stream.ReadAsync(byteBuffer.AsMemory(shiftedBytesLength))
.ConfigureAwait(false);
bytesToSearchIn = byteBuffer.AsSpan(0, shiftedBytesLength + read);
}
var charBuffer = ArrayPool<char>.Shared.Rent(bytesToSearchIn.Length);
try
{
var chars = charBuffer.AsSpan(0, bytesToSearchIn.Length);
if (Encoding.UTF8.TryGetChars(bytesToSearchIn, chars, out var charsWritten) &&
_regex.TryGetFirstMatch(chars[..charsWritten], out var magnetRange))
{
return new(chars[magnetRange].ToString());
}
}
finally
{
ArrayPool<char>.Shared.Return(charBuffer);
}
bytesToSearchIn[^_keepFromLastBuffer..].CopyTo(byteBuffer);
}
return null;
}
finally
{
ArrayPool<byte>.Shared.Return(byteBuffer);
}
}
}
static class RegexExtensions
{
public static bool TryGetFirstMatch(this Regex regex, ReadOnlySpan<char> span, out Range matchRange)
{
foreach (var match in regex.EnumerateMatches(span))
{
matchRange = new(match.Index, match.Index + match.Length);
return true;
}
matchRange = default;
return false;
}
}
static class ReadOnlySpanExtensions
{
public static int IndexOfStartOf(this Span<byte> span, ReadOnlySpan<byte> value) =>
((ReadOnlySpan<byte>)span).IndexOfStartOf(value);
public static int IndexOfStartOf(this ReadOnlySpan<byte> span, ReadOnlySpan<byte> value)
{
var index = span.IndexOf(value);
if (index is not -1)
return index;
for (var end = span[^Math.Min(span.Length, value.Length - 1)..]; end.Length > 0; end = end[1..])
{
index = end.IndexOf(value[0]);
if (index is -1)
return -1;
end = end[index..];
if (end.SequenceEqual(value[..end.Length]))
return span.Length - end.Length;
}
return -1;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment