| # download portable samtools binary v1.14 (not the latest version) | |
| curl -L 'https://zenodo.org/records/5731013/files/htstools-1.14_x64-linux.tar.bz2?download=1' | tar -jxf - htstools-1.14_x64-linux/samtools | |
| htstools-1.14_x64-linux/samtools # test run; the following command lines assume "samtools" is on $PATH | |
| # download minimap2 binary; also easy to compile from the source code | |
| curl -L https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28_x64-linux.tar.bz2 | tar -jxf - minimap2-2.28_x64-linux/minimap2 | |
| # download T2T-CHM13v2 analysis set | |
| curl -L https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/CHM13/assemblies/analysis_set/chm13v2.0_maskedY_rCRS.fa.gz | zcat > chm13v2.fa | |
| samtools faidx chm13v2.fa |
| // MIT License | |
| // | |
| // Copyright (c) 2018 degski | |
| // | |
| // Permission is hereby granted, free of charge, to any person obtaining a copy | |
| // of this software and associated documentation files (the "Software"), to deal | |
| // in the Software without restriction, including without limitation the rights | |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| // copies of the Software, and to permit persons to whom the Software is |
| #creates a base image from condo | |
| FROM continuumio/miniconda3 | |
| SHELL ["/bin/bash", "-c"] | |
| COPY environment.yml . | |
| #run environment | |
| #RUN conda env create -f environment.yml | |
| RUN conda init bash |
| // To compile: | |
| // gcc -g -O2 example.c libminimap2.a -lz | |
| #include <stdlib.h> | |
| #include <assert.h> | |
| #include <stdio.h> | |
| #include <zlib.h> | |
| #include "minimap.h" | |
| #include "kseq.h" | |
| KSEQ_INIT(gzFile, gzread) |
| // a combination of inverse square root (see wiki) and inversion: https://bits.stephan-brumme.com/inverse.html | |
| static inline float mg_sqrtf(float x) | |
| { | |
| union { float f; uint32_t i; } z = { x }; | |
| z.i = 0x5f3759df - (z.i >> 1); | |
| z.f *= (1.5f - (x * 0.5f * z.f * z.f)); | |
| z.i = 0x7EEEEEEE - z.i; | |
| return z.f; | |
| } |
| gg:ksw2_ggd.c cli.c ksw2.h | |
| $(CC) -Wall -g -O2 -o $@ ksw2_ggd.c cli.c | |
| clean: | |
| rm -fr *.o *.dSYM gg |
| CREATE TABLE seq ( | |
| checksum TEXT, | |
| ac TEXT, -- INSDC sequence accession, when available | |
| len INTEGER, -- could be of type "TEXT"; no need to implement "less than" | |
| seq TEXT, | |
| PRIMARY KEY (checksum) -- what about collisions? | |
| ); | |
| CREATE INDEX seq_len ON seq (len) | |
| CREATE INDEX seq_ac ON seq (ac) -- different checksums may have the same AC |
NA12878 DirectRNA reads were obtained [here][raw-data] (passed reads only) and aligned with [minimap2][minimap2] v2.5 against the no_alt_analysis_set of GRCh38 plus SIRV contigs. It took <1 wall-clock hour across 16 CPU cores with command-line options: -cx splice -k14 --cs -uf -N20 -t16. Alignments were converted to BED with the misc/splice2bed.js script from minimap2 and then converted to BigBed. Ribosome-related genes (RPL*, RPS*, EEF* and RPSA) were excluded to reduce the file size. The final BigBed is hosted [at OSF][osf-prj].
A UCSC custom track is configured with
track type=bigBed name=NA12878-DirectRNA.minimap2-2.5 useScore=1 visibility=4 itemRgb="On" bigDataUrl=https://files.osf.io/v1/resources/b5nm2/providers/osfstorage/5a2347599ad5a10272ed5739?action=download&version=1&direct
You can access this track with the [following link][direct-link]. A GMAP alignment track is temporarily available [here][gmap]. This track contains 1/4 of reads. GMAP is still running. It will take 4–5 wall-clock
| #include <stdio.h> | |
| #define SIMD_SSE 0x1 | |
| #define SIMD_SSE2 0x2 | |
| #define SIMD_SSE3 0x4 | |
| #define SIMD_SSSE3 0x8 | |
| #define SIMD_SSE4_1 0x10 | |
| #define SIMD_SSE4_2 0x20 | |
| #define SIMD_AVX 0x40 | |
| #define SIMD_AVX2 0x80 |