Last active
January 14, 2025 10:00
-
-
Save aannenko/babd86e8c88ddce960e5dcdf67d1817a to your computer and use it in GitHub Desktop.
Find magnet link on a web page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2605) | |
13th Gen Intel Core i9-13900K, 1 CPU, 32 logical and 24 physical cores | |
.NET SDK 9.0.101 | |
[Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 | |
DefaultJob : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 | |
| Method | Mean | Error | StdDev | Ratio | Gen0 | Gen1 | Allocated | Alloc Ratio | | |
|------------------ |----------:|----------:|----------:|------:|-------:|-------:|----------:|------------:| | |
| FindMagnetInLines | 24.945 μs | 0.2800 μs | 0.2619 μs | 1.00 | 4.1504 | 0.0610 | 78168 B | 1.000 | | |
| FindMagnetInBytes | 2.026 μs | 0.0095 μs | 0.0089 μs | 0.08 | 0.0153 | - | 312 B | 0.004 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Buffers; | |
using System.Text.RegularExpressions; | |
using System.Text; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Running; | |
BenchmarkRunner.Run<MagnetRetrievalBenchmark>(args: args); | |
[MemoryDiagnoser] | |
public class MagnetRetrievalBenchmark | |
{ | |
private const int _bufferSize = 2048; | |
private const int _keepFromLastBuffer = _bufferSize / 16; | |
private static readonly Regex _regex = new(@"magnet:\?xt=urn:btih:[^""]+", RegexOptions.Compiled | RegexOptions.ExplicitCapture, TimeSpan.FromMilliseconds(100)); | |
private static readonly Stream _stream = new MemoryStream(); | |
public MagnetRetrievalBenchmark() | |
{ | |
using var webStream = new HttpClient().GetStreamAsync("https://nnmclub.to/forum/viewtopic.php?t=1716490").Result; | |
webStream.CopyTo(_stream); | |
} | |
[Benchmark(Baseline = true)] | |
public async Task<Uri?> FindMagnetInLines() // Benchmark methods must be public. | |
{ | |
_stream.Position = 0; | |
using var reader = new StreamReader(_stream, leaveOpen: true); | |
var nextLineTask = reader.ReadLineAsync(); | |
string? line; | |
while ((line = await nextLineTask.ConfigureAwait(false)) is not null) | |
{ | |
nextLineTask = reader.ReadLineAsync(); | |
if (_regex.TryGetFirstMatch(line, out var magnetRange)) | |
return new(line[magnetRange]); | |
} | |
return null; | |
} | |
[Benchmark] | |
public async Task<Uri?> FindMagnetInBytes() | |
{ | |
_stream.Position = 0; | |
var byteBuffer = ArrayPool<byte>.Shared.Rent(_bufferSize); | |
try | |
{ | |
var read = 0; | |
while ((read = await _stream.ReadAsync(byteBuffer.AsMemory(_keepFromLastBuffer)) | |
.ConfigureAwait(false)) > 0) | |
{ | |
var bytesToSearchIn = byteBuffer.AsSpan(0, _keepFromLastBuffer + read); | |
var indexOfMagnet = bytesToSearchIn.IndexOfStartOf("magnet:?"u8); | |
if (indexOfMagnet is -1) | |
{ | |
bytesToSearchIn[^_keepFromLastBuffer..].CopyTo(byteBuffer); | |
continue; | |
} | |
if (indexOfMagnet >= _keepFromLastBuffer * 4) | |
{ | |
var bytesToShiftToStart = bytesToSearchIn[(indexOfMagnet - _keepFromLastBuffer)..]; | |
var shiftedBytesLength = bytesToShiftToStart.Length; | |
bytesToShiftToStart.CopyTo(byteBuffer); | |
read = await _stream.ReadAsync(byteBuffer.AsMemory(shiftedBytesLength)) | |
.ConfigureAwait(false); | |
bytesToSearchIn = byteBuffer.AsSpan(0, shiftedBytesLength + read); | |
} | |
var charBuffer = ArrayPool<char>.Shared.Rent(bytesToSearchIn.Length); | |
try | |
{ | |
var chars = charBuffer.AsSpan(0, bytesToSearchIn.Length); | |
if (Encoding.UTF8.TryGetChars(bytesToSearchIn, chars, out var charsWritten) && | |
_regex.TryGetFirstMatch(chars[..charsWritten], out var magnetRange)) | |
{ | |
return new(chars[magnetRange].ToString()); | |
} | |
} | |
finally | |
{ | |
ArrayPool<char>.Shared.Return(charBuffer); | |
} | |
bytesToSearchIn[^_keepFromLastBuffer..].CopyTo(byteBuffer); | |
} | |
return null; | |
} | |
finally | |
{ | |
ArrayPool<byte>.Shared.Return(byteBuffer); | |
} | |
} | |
} | |
static class RegexExtensions | |
{ | |
public static bool TryGetFirstMatch(this Regex regex, ReadOnlySpan<char> span, out Range matchRange) | |
{ | |
foreach (var match in regex.EnumerateMatches(span)) | |
{ | |
matchRange = new(match.Index, match.Index + match.Length); | |
return true; | |
} | |
matchRange = default; | |
return false; | |
} | |
} | |
static class ReadOnlySpanExtensions | |
{ | |
public static int IndexOfStartOf(this Span<byte> span, ReadOnlySpan<byte> value) => | |
((ReadOnlySpan<byte>)span).IndexOfStartOf(value); | |
public static int IndexOfStartOf(this ReadOnlySpan<byte> span, ReadOnlySpan<byte> value) | |
{ | |
var index = span.IndexOf(value); | |
if (index is not -1) | |
return index; | |
for (var end = span[^Math.Min(span.Length, value.Length - 1)..]; end.Length > 0; end = end[1..]) | |
{ | |
index = end.IndexOf(value[0]); | |
if (index is -1) | |
return -1; | |
end = end[index..]; | |
if (end.SequenceEqual(value[..end.Length])) | |
return span.Length - end.Length; | |
} | |
return -1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment