Created
August 29, 2024 01:46
-
-
Save neon-sunset/544b7a766d2a04b85ba72e03e3b1cd71 to your computer and use it in GitHub Desktop.
Fasta Rust #7 -> C#
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// The Computer Language Benchmarks Game | |
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ | |
// | |
// contributed by the Rust Project Developers | |
// contributed by TeXitoi | |
// contributed by Alisdair Owens | |
// contributed by Ryohei Machida | |
// adapted to C# by Arseniy Zlobintsev | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using static Constants; | |
[module: SkipLocalsInit] | |
var n = args is [var arg] ? uint.Parse(arg) : 1000; | |
var tasks = (ushort)Math.Min(Environment.ProcessorCount, 2); | |
var alu = | |
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT"u8 + | |
"GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC"u8 + | |
"GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT"u8 + | |
"AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG"u8 + | |
"TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT"u8 + | |
"TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG"u8 + | |
"CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC"u8 + | |
"TCAAAAA"u8; | |
Console.WriteLine(">ONE Homo sapiens alu"); | |
FastaRepeat(alu, n * 2); | |
var rng = new MyRandom(n * 3, tasks); | |
var iub = new WeightedRandom<byte>([ | |
((byte)'a', 0.27f), | |
((byte)'c', 0.12f), | |
((byte)'g', 0.12f), | |
((byte)'t', 0.27f), | |
((byte)'B', 0.02f), | |
((byte)'D', 0.02f), | |
((byte)'H', 0.02f), | |
((byte)'K', 0.02f), | |
((byte)'M', 0.02f), | |
((byte)'N', 0.02f), | |
((byte)'R', 0.02f), | |
((byte)'S', 0.02f), | |
((byte)'V', 0.02f), | |
((byte)'W', 0.02f), | |
((byte)'Y', 0.02f), | |
]); | |
Console.WriteLine(">TWO IUB ambiguity codes"); | |
FastaRandomPar(rng, iub, tasks); | |
lock (rng) { | |
rng.Reset(n * 5); | |
} | |
var homosapiens = new WeightedRandom<byte>([ | |
((byte)'a', 0.3029549426680f), | |
((byte)'c', 0.1979883004921f), | |
((byte)'g', 0.1975473066391f), | |
((byte)'t', 0.3015094502008f), | |
]); | |
Console.WriteLine(">THREE Homo sapiens frequency"); | |
FastaRandomPar(rng, homosapiens, tasks); | |
static int GCD(int a, int b) => | |
b is 0 ? a : GCD(b, a % b); | |
static void FastaRepeat(ReadOnlySpan<byte> seq, uint n) { | |
var numLinesPerBuf = seq.Length / GCD(seq.Length, LineLength); | |
var bufSize = numLinesPerBuf * (LineLength + 1); | |
var buf = (stackalloc byte[bufSize]); | |
var n2 = (int)(n + n / LineLength); | |
// fill buf | |
var k = 0; | |
for (var i = 0; i < numLinesPerBuf; i++) { | |
for (var j = 0; j < LineLength; j++) { | |
buf[i * (LineLength + 1) + j] = seq[k++ % seq.Length]; | |
} | |
buf[i * (LineLength + 1) + LineLength] = (byte)'\n'; | |
} | |
// write to stdout | |
var stdout = Console.OpenStandardOutput(); | |
while (n2 >= bufSize) { | |
stdout.Write(buf); | |
n2 -= bufSize; | |
} | |
// trailing line feed | |
if (n2 % LineLength != 0) { | |
buf[n2] = (byte)'\n'; | |
n2++; | |
} | |
stdout.Write(buf[..n2]); | |
} | |
static void FastaRandom( | |
ushort task, | |
MyRandom rng, | |
MyStdout writer, | |
WeightedRandom<byte> wr | |
) { | |
var rngBuf = (stackalloc uint[BlockLength]); | |
var outBuf = (stackalloc byte[BlockLength + Lines]); | |
while (true) { | |
var count = 0; | |
while (true) { | |
lock (rng) { | |
if (rng.Next(rngBuf, task) is int n) { | |
count = n; break; | |
} | |
} | |
} | |
if (count is 0) break; | |
rngBuf = rngBuf[..count]; | |
var lineCount = 0; | |
for (var from = 0; from < rngBuf.Length; from += LineLength) { | |
var to = Math.Min(from + LineLength, rngBuf.Length); | |
for (var j = from; j < to; j++) { | |
outBuf[j + lineCount] = wr.Next(rngBuf[j]); | |
} | |
outBuf[to + lineCount] = (byte)'\n'; | |
lineCount++; | |
} | |
while (true) { | |
lock (writer) { | |
if (writer.Write( | |
outBuf[..(rngBuf.Length + lineCount)], task)) break; | |
} | |
} | |
} | |
} | |
static void FastaRandomPar( | |
MyRandom rng, | |
WeightedRandom<byte> wr, | |
ushort tasks | |
) { | |
var stdout = new MyStdout(tasks); | |
var workers = new Task[tasks]; | |
for (var i = 0; i < workers.Length; i++) { | |
var task = (ushort)i; | |
workers[i] = Task.Run(() => | |
FastaRandom(task, rng, stdout, wr)); | |
} | |
Task.WaitAll(workers); | |
} | |
static class Constants { | |
public const int Lines = 1024; | |
public const int LineLength = 60; | |
public const int BlockLength = LineLength * Lines; | |
public const int IM = 139968; | |
} | |
[StructLayout(LayoutKind.Sequential, Pack = 32)] | |
struct Aligned<T> { public T Value; } | |
[InlineArray(16)] | |
struct Buf16<T> { T _; } | |
class WeightedRandom<T> { | |
readonly Aligned<Buf16<uint>> cumprob; | |
readonly Buf16<T> elements; | |
public WeightedRandom(ReadOnlySpan<(T, float)> mapping) { | |
var cumprob = (Span<uint>)this.cumprob.Value; | |
cumprob.Fill(int.MaxValue); | |
var acc = .0; | |
for (var i = 0; i < mapping.Length; i++) { | |
var (elem, weight) = mapping[i]; | |
elements[i] = elem; | |
acc += weight; | |
cumprob[i] = (uint)Math.Floor(acc * IM); | |
} | |
} | |
public T Next(uint prob) { | |
var cumprob = (ReadOnlySpan<uint>)this.cumprob.Value; | |
var needle = Vector512.Create(prob); | |
var vcp = Vector512.Create(cumprob); | |
var count = -Vector512.LessThan(vcp, needle); | |
var idx = Vector512.Sum(count); | |
return elements[(int)idx]; | |
} | |
} | |
class MyRandom(uint count, ushort tasks) { | |
uint count = count; | |
uint seed = 42; | |
ushort nextTask; | |
public int? Next(Span<uint> bufffer, ushort currTask) { | |
if (currTask != nextTask) return null; | |
nextTask = (ushort)((currTask + 1) % tasks); | |
var toGen = Math.Min((uint)bufffer.Length, count); | |
for (var i = 0; i < (int)toGen; i++) { | |
seed = (seed * 3877 + 29573) % IM; | |
bufffer[i] = seed; | |
} | |
count -= toGen; | |
return (int)toGen; | |
} | |
public void Reset(uint newCount) { | |
nextTask = 0; | |
count = newCount; | |
} | |
} | |
class MyStdout(ushort tasks) { | |
readonly Stream stdout = Console.OpenStandardOutput(); | |
ushort nextTask; | |
public bool Write(ReadOnlySpan<byte> buffer, ushort currTask) { | |
if (nextTask != currTask) return false; | |
nextTask = (ushort)((currTask + 1) % tasks); | |
stdout.Write(buffer); | |
return true; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment