mamba create -y -n pybwa setuptools cython python=3.11 poetry=1.8 snakemake-minimal=7 defopt samtools bwa pysam fgpyo sra-tools
conda activate pybwa
snakemake --cores $(nproc) --snakefile Snakefile --directory output
๐
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
BEGIN:VCALENDAR | |
VERSION:2.0 | |
PRODID:ics.py - http://git.io/lLljaA | |
BEGIN:VEVENT | |
DESCRIPTION:Session: Plenary Session: Genetics (Len Pennacchio\, Lawrence Berkeley National Laboratory and DOE Joint Genome Institute\, Session Chair)\nTitle: โ Genomics โ looking back\, looking forwards. A view from EMBLโ\nSub-title: Ewan Birney\nLocation: Calusa Ballroom 1-7\nDate: Sunday\, February 23\nTime: 5:40 p.m. โ 6:10 p.m.\n | |
DTEND:20250223T231000Z | |
LOCATION:Calusa Ballroom 1-7 | |
DTSTART:20250223T224000Z | |
SUMMARY:Plenary Session: Genetics โ Genomics โ looking back\, looking forwards. A view from EMBLโ Ewan Birney | |
UID:[email protected] |
This is motiviated by fulcrumgenomics/fgsv#25 whereby the be
tag stores a semi-colon delimited list of break end values, and we want each value to be in their own tag.
Supports where the case where the be
tag may contain multiple such delimited values, each seperated by a different delimiter (in this case a comma).
python split_sam_tag.py \
--in-bam fgsv.bam \
--out-bam split.bam \
--in-tag be \
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pigz -d -c in.fastq.gz | | |
| awk 'BEGIN {LN=0; } { if (LN == 1 || LN == 3) { print substr($0, 0, 26) } else { print $0 } ; if (LN == 3) { LN = 0 } else { LN++ } }' \ | |
| pigz -c - > out.fastq.gz |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from pathlib import Path | |
from typing import Any | |
from typing import Callable | |
from typing import Dict | |
from typing import List | |
from typing import Optional | |
import snakemake |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Delimited-data files: no empty columns please | |
function columnit { column -t $1 | less -S; } | |
function tabit { column -t -s $'\t' $1 | less -S; } | |
# Delimited-data files: empty columns values are nasty | |
function columnit-empty { cat $1 | sed -E 's_'$'\t'$'\t''_'$'\t''NA'$'\t''_g' | column -t | less -S; } | |
function tabit-empty { cat $1 | sed -E 's_'$'\t'$'\t''_'$'\t''NA'$'\t''_g' | column -t -s $'\t' | less -S; } | |
# Show me the money grep, now | |
function grep-nobuff { gstdbuf -o0 grep $@; } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@HD VN:1.6 | |
@SQ SN:chr1 LN:249250621 am:1 ga:CM000663.1 sn:1 ra:NC_000001.10 un:chr1 AN:1,CM000663.1,NC_000001.10 sr:assembled-molecule | |
@SQ SN:chr2 LN:243199373 am:2 ga:CM000664.1 sn:2 ra:NC_000002.11 un:chr2 AN:2,CM000664.1,NC_000002.11 sr:assembled-molecule | |
@SQ SN:chr3 LN:198022430 am:3 ga:CM000665.1 sn:3 ra:NC_000003.11 un:chr3 AN:3,CM000665.1,NC_000003.11 sr:assembled-molecule | |
@SQ SN:chr4 LN:191154276 am:4 ga:CM000666.1 sn:4 ra:NC_000004.11 un:chr4 AN:4,CM000666.1,NC_000004.11 sr:assembled-molecule | |
@SQ SN:chr5 LN:180915260 am:5 ga:CM000667.1 sn:5 ra:NC_000005.9 un:chr5 AN:5,CM000667.1,NC_000005.9 sr:assembled-molecule | |
@SQ SN:chr6 LN:171115067 am:6 ga:CM000668.1 sn:6 ra:NC_000006.11 un:chr6 AN:6,CM000668.1,NC_000006.11 sr:assembled-molecule | |
@SQ SN:chr7 LN:159138663 am:7 ga:CM000669.1 sn:7 ra:NC_000007.13 un:chr7 AN:7,CM000669.1,NC_000007.13 sr:assembled-molecule | |
@SQ SN:chr8 LN:146364022 am:8 ga:CM000670.1 sn:8 ra:NC_000008.10 un:chr8 AN:8,CM000670.1,NC_000008.10 sr:assembled-molecule | |
@SQ SN:chr9 LN:14121343 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@HD VN:1.6 GO:query SO:unsorted SS:unsorted:template-coordinate | |
@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000001.11,CM000663.2 | |
@SQ SN:chr2 LN:242193529 M5:f98db672eb0993dcfdabafe2a882905c AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000002.12,CM000664.2 | |
@SQ SN:chr3 LN:198295559 M5:76635a41ea913a405ded820447d067b0 AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000003.12,CM000665.2 | |
@SQ SN:chr4 LN:190214555 M5:3210fecf1eb92d5489da4346b3fddc6e AS:hg38 UR:ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/001/405/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz SP:Homo sapiens AN:NC_000004.12,CM00066 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import $ivy.`com.fulcrumgenomics::fgbio:1.1.0` | |
import com.fulcrumgenomics.FgBioDef._ | |
import java.nio.file.{Path, Paths} | |
import com.fulcrumgenomics.commons.util.{LazyLogging, Logger} | |
import com.fulcrumgenomics.util.ProgressLogger | |
import com.fulcrumgenomics.bam.api._ | |
import htsjdk.samtools.SAMReadGroupRecord | |
// So that a [[Path]] can be built from a [[String]] on the command line |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import enum | |
from typing import List | |
from pathlib import Path | |
class Alignment(enum.Enum): | |
BWA = 1 | |
Bowtie = 2 | |
class VariantCalling(enum.Enum): |
NewerOlder