Skip to content

Instantly share code, notes, and snippets.

@Ilchert
Created January 4, 2024 21:16
Show Gist options
  • Save Ilchert/302c864befc93ee11d9ab50384f9278d to your computer and use it in GitHub Desktop.
Save Ilchert/302c864befc93ee11d9ab50384f9278d to your computer and use it in GitHub Desktop.
// See https://aka.ms/new-console-template for more information
using System.Buffers.Binary;
using System.Buffers.Text;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Text;
var filePath = @"C:\projects\local\1brc-main\measurements.txt";
var start = Stopwatch.GetTimestamp();
using var handle = File.OpenHandle(filePath, FileMode.Open, FileAccess.Read, options: FileOptions.Asynchronous | FileOptions.SequentialScan);
var size = RandomAccess.GetLength(handle);
const int CacheSize = 1 * 1024 * 1024;
var cache = new byte[CacheSize].AsMemory();
var dict = new Dictionary<Memory<byte>, Summary>(new MemoryEqualityComparer());
var readCache = Memory<byte>.Empty;
var currentPosition = 0L;
while (true)
{
readCache.CopyTo(cache);
var readBytes = RandomAccess.Read(handle, cache[readCache.Length..].Span, currentPosition);
if (readBytes < 2)
break;
currentPosition += readBytes;
readCache = cache[..(readBytes + readCache.Length)];
while (true)
{
var commaIndex = readCache.Span.IndexOf((byte)';');
if (commaIndex == -1)
break;
var station = readCache[..commaIndex];
var readCacheEnd = readCache[(commaIndex + 1)..];
var endLineIndex = readCacheEnd.Span.IndexOf((byte)'\r');
if (endLineIndex == -1 || endLineIndex + 1 == readCacheEnd.Length)
break;
var temperature = readCacheEnd[..endLineIndex];
readCache = readCacheEnd[(endLineIndex + 2)..];
if (!Utf8Parser.TryParse(temperature.Span, out double temperatureValue, out _))
throw new FormatException();
if (!dict.TryGetValue(station, out var summary))
{
summary = new Summary();
dict[station.ToArray()] = summary;
}
summary.Apply(temperatureValue);
}
//Console.WriteLine($"Progress: {currentPosition / (double)size}. Count: {dict.Count}");
}
var sb = new StringBuilder();
foreach (var (station, summary) in dict.OrderBy(p => p.Key, new MemoryComparer()))
{
sb.AppendLine($"{Encoding.UTF8.GetString(station.Span)} {summary.Min:N1}/{summary.Average:N1}/{summary.Max:N1}");
}
Console.WriteLine(sb.ToString());
var time = Stopwatch.GetElapsedTime(start);
Console.WriteLine($"Completed in {time}");
class MemoryComparer : IComparer<Memory<byte>>
{
public int Compare(Memory<byte> x, Memory<byte> y)
{
return x.Span.SequenceCompareTo(y.Span);
}
}
class MemoryEqualityComparer : IEqualityComparer<Memory<byte>>
{
public bool Equals(Memory<byte> x, Memory<byte> y) =>
x.Span.SequenceEqual(y.Span);
public int GetHashCode([DisallowNull] Memory<byte> obj)
{
var span = obj.Span;
return span.Length switch
{
> 15 => BinaryPrimitives.ReadUInt128LittleEndian(span).GetHashCode(),
> 7 => BinaryPrimitives.ReadInt64LittleEndian(span).GetHashCode(),
> 4 => BinaryPrimitives.ReadInt32LittleEndian(span),
> 2 => BinaryPrimitives.ReadInt16LittleEndian(span),
1 => span[0],
0 => 0,
_ => 0,
};
}
}
public class Summary
{
public double Min;
public double Max;
public double Sum;
public long Count;
public double Average => Sum / Count;
public void Init(double value)
{
Min = value;
Max = value;
Sum += value;
Count++;
}
public void Apply(double value)
{
if (value < Min)
Min = value;
else if (value > Max)
Max = value;
Sum += value;
Count++;
}
public void Apply(Summary other)
{
if (other.Min < Min)
Min = other.Min;
if (other.Max > Max)
Max = other.Max;
Sum += other.Sum;
Count += other.Count;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment