Skip to content

Instantly share code, notes, and snippets.

@neon-sunset
Created August 29, 2024 01:46
Show Gist options
  • Save neon-sunset/544b7a766d2a04b85ba72e03e3b1cd71 to your computer and use it in GitHub Desktop.
Save neon-sunset/544b7a766d2a04b85ba72e03e3b1cd71 to your computer and use it in GitHub Desktop.
Fasta Rust #7 -> C#
// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by TeXitoi
// contributed by Alisdair Owens
// contributed by Ryohei Machida
// adapted to C# by Arseniy Zlobintsev
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using static Constants;
[module: SkipLocalsInit]
var n = args is [var arg] ? uint.Parse(arg) : 1000;
var tasks = (ushort)Math.Min(Environment.ProcessorCount, 2);
var alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT"u8 +
"GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC"u8 +
"GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT"u8 +
"AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG"u8 +
"TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT"u8 +
"TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG"u8 +
"CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC"u8 +
"TCAAAAA"u8;
Console.WriteLine(">ONE Homo sapiens alu");
FastaRepeat(alu, n * 2);
var rng = new MyRandom(n * 3, tasks);
var iub = new WeightedRandom<byte>([
((byte)'a', 0.27f),
((byte)'c', 0.12f),
((byte)'g', 0.12f),
((byte)'t', 0.27f),
((byte)'B', 0.02f),
((byte)'D', 0.02f),
((byte)'H', 0.02f),
((byte)'K', 0.02f),
((byte)'M', 0.02f),
((byte)'N', 0.02f),
((byte)'R', 0.02f),
((byte)'S', 0.02f),
((byte)'V', 0.02f),
((byte)'W', 0.02f),
((byte)'Y', 0.02f),
]);
Console.WriteLine(">TWO IUB ambiguity codes");
FastaRandomPar(rng, iub, tasks);
lock (rng) {
rng.Reset(n * 5);
}
var homosapiens = new WeightedRandom<byte>([
((byte)'a', 0.3029549426680f),
((byte)'c', 0.1979883004921f),
((byte)'g', 0.1975473066391f),
((byte)'t', 0.3015094502008f),
]);
Console.WriteLine(">THREE Homo sapiens frequency");
FastaRandomPar(rng, homosapiens, tasks);
static int GCD(int a, int b) =>
b is 0 ? a : GCD(b, a % b);
static void FastaRepeat(ReadOnlySpan<byte> seq, uint n) {
var numLinesPerBuf = seq.Length / GCD(seq.Length, LineLength);
var bufSize = numLinesPerBuf * (LineLength + 1);
var buf = (stackalloc byte[bufSize]);
var n2 = (int)(n + n / LineLength);
// fill buf
var k = 0;
for (var i = 0; i < numLinesPerBuf; i++) {
for (var j = 0; j < LineLength; j++) {
buf[i * (LineLength + 1) + j] = seq[k++ % seq.Length];
}
buf[i * (LineLength + 1) + LineLength] = (byte)'\n';
}
// write to stdout
var stdout = Console.OpenStandardOutput();
while (n2 >= bufSize) {
stdout.Write(buf);
n2 -= bufSize;
}
// trailing line feed
if (n2 % LineLength != 0) {
buf[n2] = (byte)'\n';
n2++;
}
stdout.Write(buf[..n2]);
}
static void FastaRandom(
ushort task,
MyRandom rng,
MyStdout writer,
WeightedRandom<byte> wr
) {
var rngBuf = (stackalloc uint[BlockLength]);
var outBuf = (stackalloc byte[BlockLength + Lines]);
while (true) {
var count = 0;
while (true) {
lock (rng) {
if (rng.Next(rngBuf, task) is int n) {
count = n; break;
}
}
}
if (count is 0) break;
rngBuf = rngBuf[..count];
var lineCount = 0;
for (var from = 0; from < rngBuf.Length; from += LineLength) {
var to = Math.Min(from + LineLength, rngBuf.Length);
for (var j = from; j < to; j++) {
outBuf[j + lineCount] = wr.Next(rngBuf[j]);
}
outBuf[to + lineCount] = (byte)'\n';
lineCount++;
}
while (true) {
lock (writer) {
if (writer.Write(
outBuf[..(rngBuf.Length + lineCount)], task)) break;
}
}
}
}
static void FastaRandomPar(
MyRandom rng,
WeightedRandom<byte> wr,
ushort tasks
) {
var stdout = new MyStdout(tasks);
var workers = new Task[tasks];
for (var i = 0; i < workers.Length; i++) {
var task = (ushort)i;
workers[i] = Task.Run(() =>
FastaRandom(task, rng, stdout, wr));
}
Task.WaitAll(workers);
}
static class Constants {
public const int Lines = 1024;
public const int LineLength = 60;
public const int BlockLength = LineLength * Lines;
public const int IM = 139968;
}
[StructLayout(LayoutKind.Sequential, Pack = 32)]
struct Aligned<T> { public T Value; }
[InlineArray(16)]
struct Buf16<T> { T _; }
class WeightedRandom<T> {
readonly Aligned<Buf16<uint>> cumprob;
readonly Buf16<T> elements;
public WeightedRandom(ReadOnlySpan<(T, float)> mapping) {
var cumprob = (Span<uint>)this.cumprob.Value;
cumprob.Fill(int.MaxValue);
var acc = .0;
for (var i = 0; i < mapping.Length; i++) {
var (elem, weight) = mapping[i];
elements[i] = elem;
acc += weight;
cumprob[i] = (uint)Math.Floor(acc * IM);
}
}
public T Next(uint prob) {
var cumprob = (ReadOnlySpan<uint>)this.cumprob.Value;
var needle = Vector512.Create(prob);
var vcp = Vector512.Create(cumprob);
var count = -Vector512.LessThan(vcp, needle);
var idx = Vector512.Sum(count);
return elements[(int)idx];
}
}
class MyRandom(uint count, ushort tasks) {
uint count = count;
uint seed = 42;
ushort nextTask;
public int? Next(Span<uint> bufffer, ushort currTask) {
if (currTask != nextTask) return null;
nextTask = (ushort)((currTask + 1) % tasks);
var toGen = Math.Min((uint)bufffer.Length, count);
for (var i = 0; i < (int)toGen; i++) {
seed = (seed * 3877 + 29573) % IM;
bufffer[i] = seed;
}
count -= toGen;
return (int)toGen;
}
public void Reset(uint newCount) {
nextTask = 0;
count = newCount;
}
}
class MyStdout(ushort tasks) {
readonly Stream stdout = Console.OpenStandardOutput();
ushort nextTask;
public bool Write(ReadOnlySpan<byte> buffer, ushort currTask) {
if (nextTask != currTask) return false;
nextTask = (ushort)((currTask + 1) % tasks);
stdout.Write(buffer);
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment