# download portable samtools binary v1.14 (not the latest version) | |
curl -L 'https://zenodo.org/records/5731013/files/htstools-1.14_x64-linux.tar.bz2?download=1' | tar -jxf - htstools-1.14_x64-linux/samtools | |
htstools-1.14_x64-linux/samtools # test run; the following command lines assume "samtools" is on $PATH | |
# download minimap2 binary; also easy to compile from the source code | |
curl -L https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28_x64-linux.tar.bz2 | tar -jxf - minimap2-2.28_x64-linux/minimap2 | |
# download T2T-CHM13v2 analysis set | |
curl -L https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/analysis_set/chm13v2.0_maskedY_rCRS.fa.gz | zcat > chm13v2.fa | |
samtools faidx chm13v2.fa |
// MIT License | |
// | |
// Copyright (c) 2018 degski | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is |
#creates a base image from condo | |
FROM continuumio/miniconda3 | |
SHELL ["/bin/bash", "-c"] | |
COPY environment.yml . | |
#run environment | |
#RUN conda env create -f environment.yml | |
RUN conda init bash |
// To compile: | |
// gcc -g -O2 example.c libminimap2.a -lz | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <stdio.h> | |
#include <zlib.h> | |
#include "minimap.h" | |
#include "kseq.h" | |
KSEQ_INIT(gzFile, gzread) |
// a combination of inverse square root (see wiki) and inversion: https://bits.stephan-brumme.com/inverse.html | |
static inline float mg_sqrtf(float x) | |
{ | |
union { float f; uint32_t i; } z = { x }; | |
z.i = 0x5f3759df - (z.i >> 1); | |
z.f *= (1.5f - (x * 0.5f * z.f * z.f)); | |
z.i = 0x7EEEEEEE - z.i; | |
return z.f; | |
} |
gg:ksw2_ggd.c cli.c ksw2.h | |
$(CC) -Wall -g -O2 -o $@ ksw2_ggd.c cli.c | |
clean: | |
rm -fr *.o *.dSYM gg |
CREATE TABLE seq ( | |
checksum TEXT, | |
ac TEXT, -- INSDC sequence accession, when available | |
len INTEGER, -- could be of type "TEXT"; no need to implement "less than" | |
seq TEXT, | |
PRIMARY KEY (checksum) -- what about collisions? | |
); | |
CREATE INDEX seq_len ON seq (len) | |
CREATE INDEX seq_ac ON seq (ac) -- different checksums may have the same AC |
NA12878 DirectRNA reads were obtained [here][raw-data] (passed reads only) and aligned with [minimap2][minimap2] v2.5 against the no_alt_analysis_set
of GRCh38 plus SIRV contigs. It took <1 wall-clock hour across 16 CPU cores with command-line options: -cx splice -k14 --cs -uf -N20 -t16
. Alignments were converted to BED with the misc/splice2bed.js
script from minimap2 and then converted to BigBed. Ribosome-related genes (RPL*, RPS*, EEF* and RPSA) were excluded to reduce the file size. The final BigBed is hosted [at OSF][osf-prj].
A UCSC custom track is configured with
track type=bigBed name=NA12878-DirectRNA.minimap2-2.5 useScore=1 visibility=4 itemRgb="On" bigDataUrl=https://files.osf.io/v1/resources/b5nm2/providers/osfstorage/5a2347599ad5a10272ed5739?action=download&version=1&direct
You can access this track with the [following link][direct-link]. A GMAP alignment track is temporarily available [here][gmap]. This track contains 1/4 of reads. GMAP is still running. It will take 4–5 wall-clock
#include <stdio.h> | |
#define SIMD_SSE 0x1 | |
#define SIMD_SSE2 0x2 | |
#define SIMD_SSE3 0x4 | |
#define SIMD_SSSE3 0x8 | |
#define SIMD_SSE4_1 0x10 | |
#define SIMD_SSE4_2 0x20 | |
#define SIMD_AVX 0x40 | |
#define SIMD_AVX2 0x80 |