Skip to content

Instantly share code, notes, and snippets.

View rob-p's full-sized avatar
🎯
Focusing

Rob Patro rob-p

🎯
Focusing
View GitHub Profile
@rob-p
rob-p / test_decomp.sh
Created October 26, 2024 14:44
parallel decompression speed test
#!/bin/bash
#
# This is a rather minimal example Argbash potential
# Example taken from http://argbash.readthedocs.io/en/stable/example.html
#
# ARG_OPTIONAL_BOOLEAN([delete-tmp],[],[delete output directory])
# ARG_POSITIONAL_SINGLE([num-threads],[number of threads to run with],[])
# ARG_POSITIONAL_SINGLE([input],[input file list],[])
# ARG_HELP([Test how long it takes to decompress many files in parallel])
# ARGBASH_GO()
use anyhow;
use ndarray::Array2;
use std::fs;
use std::cmp::Ordering;
fn naive_table(text: &str) -> Vec<u32> {
let text = text.as_bytes();
assert!(text.len() <= u32::MAX as usize);
let mut table = vec![0u32; text.len()];
for (i, element) in table.iter_mut().enumerate() {
@rob-p
rob-p / decompression_speeds.md
Created June 5, 2024 15:42
Decompression speeds with some different libraries / interfaces
$ uname -a
Linux fern 6.2.0-33-generic #33~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Sep  7 10:33:52 UTC 2 x86_64 x86_64 x86_64 GNU/Linux

zcat

$ zcat data/20201028_CCDG_14151_B01_GRM_WGS_2020-08-05_chr21.recalibrated_variants.vcf.gz|head -c $(( 10 * 1024 * 1024 * 1024 )) | pv >/dev/null
10.0GiB 0:00:47 [ 216MiB/s] [                                                                                                                                                           <=>       ]
@rob-p
rob-p / simple_builder.rs
Created February 4, 2024 16:16
A simple builder in rust (using the `derive_builder` crate).
#[macro_use]
extern crate derive_builder;
#[derive(Default, Builder, Debug)]
#[builder(setter(into))]
struct House {
floors: u32,
rooms: u32,
#[builder(default = "false")]
has_garage: bool,
@rob-p
rob-p / iter_question.md
Last active July 10, 2023 19:56
Rust Iterator Question

Note: This is cross-posted from reddit.

I've been trying to determine (a) if it's possible to achieve something in rust and (b) if so, how. I will try to abstract the problem as much as possible since the details of this in my code are rather boring and unessential to describe the problem.

I have a program that extracts information from the combination of a polars data frame, and a paired file. This is for a genomics application, and for those interested, the data frame contains the locations of sequence features (exons, transcripts, etc.) and the file contains the genome sequence (chromosome by chromosome). The genome is large, so we prefer not to load the whole thing in memory, and instead to iterate over it chromosome by chromosome and then feature by feature, yielding each sequence feature one at a time.

It turns out it's relatively simple to write a "single sequence" iterator (i.e. an iterato

use antisequence::*;
fn main() {
// 1{b[9-10]f[CAGAGC]u[8]b[10]}2{r:}
let patterns = r#"
name: anchor
patterns:
- pattern: "CAGAGC"
"#;
@rob-p
rob-p / avx2_lcp2.cpp
Created April 25, 2023 03:13
AVX2 LCP take 2
#include <immintrin.h>
const char* longest_common_prefix(const char* str1, const char* str2, int len) {
int i = 0;
for (; i <= len - 32; i += 32) {
__m256i v1 = _mm256_loadu_si256((__m256i*)(str1 + i));
__m256i v2 = _mm256_loadu_si256((__m256i*)(str2 + i));
__m256i cmp = _mm256_cmpeq_epi8(v1, v2);
int mask = _mm256_movemask_epi8(cmp);
if (mask != 0xFFFFFFFF) {
@rob-p
rob-p / avx2_lcp.cpp
Last active April 25, 2023 03:05
ChatGPT AVX2 LCP
#include <immintrin.h> // for AVX2 intrinsics
#include <cstring> // for strlen
size_t longestCommonPrefixAVX2(const char* str1, const char* str2) {
const size_t len1 = strlen(str1);
const size_t len2 = strlen(str2);
const size_t len = std::min(len1, len2);
const __m256i* p1 = reinterpret_cast<const __m256i*>(str1);
const __m256i* p2 = reinterpret_cast<const __m256i*>(str2);
@rob-p
rob-p / compare_hits.py
Created September 30, 2021 04:02
compare containment hits
from pafpy import PafFile
import argparse
import sys
def parse_tab6(tf):
res = []
first = True
with open(tf, 'r') as ifile:
for l in ifile:
if first:
@rob-p
rob-p / filter_blast.py
Created September 30, 2021 03:59
Filter blast output by containment criteria
from pafpy import PafFile
import argparse
import sys
def main(args):
pi = args.input
of = args.out
ld = {}
with open(args.lens, 'r') as ifile: