with-bcolz | time (seconds) | bcolz-time | md5sum | num_lines | filter |
---|---|---|---|---|---|
True | 2.8 | 0.62 | 5281fecb8571cc7c8e832e5682e865bb | 16778 | (gt_phred_ll_homalt).(*).(==0).(all) |
False | 155.2 | NA | 5281fecb8571cc7c8e832e5682e865bb | 16778 | (gt_phred_ll_homalt).(*).(==0).(all) |
True | 3.1 | 0.74 | b2b75c9a56e128410a0e19708f096e2f | 16750 | ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all) |
False | 212.8 | NA | b2b75c9a56e128410a0e19708f096e2f | 16750 | ((gt_phred_ll_homalt).().(==0).(all)) and (gt_types).().(==HOM_ALT).(all) |
True | 1.4 | 0.42 | c4885f35d03bed862d1e9d0ece71bc7c | 2367 | (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HOM_REF and gt_types.{mom} == HOM_REF and gt_types.{kid} != HOM_REF) |
False | 203.0 | NA | c4885f35d03bed862d1e9d0ece71bc7c | 2367 | (gt_depths.{dad} > 20 and gt_depths.{mom} > 20 and gt_depths.{kid} > 20) and (gt_types.{dad} == HO |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import sqrt | |
def jeffreys_interval(af, n_samples, alpha=0.05): | |
# https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval | |
""" | |
>>> jeffreys_interval(0.2, 5) #doctest: +ELLIPSIS | |
(0.02251..., 0.6286...) | |
>>> jeffreys_interval(0.2, 10) #doctest: +ELLIPSIS | |
(0.0440..., 0.5027...) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
from pyfaidx import Fasta | |
from cyvcf2 import VCF | |
import string | |
import re | |
import sys | |
try: # 2.x | |
maketrans = string.maketrans | |
except AttributeError: # 3.x |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import re | |
import sys | |
import os.path | |
import string | |
sex_replace = {'m': 1, 'male': 1, 'female': 2, 'f': 2} | |
phenotype_replace = {'affected': 2, 'unaffected': 1} | |
if False: |
$ gemini de_novo --columns "chrom, start, end, ref, alt" test.de_novo.db --min-kindreds 1 --gt-pl-max 1000 | cols
chrom start end ref alt gene variant_id family_id family_members family_genotypes affected_samples family_count
chr10 48003991 48003992 C T ASAH2C variant_id:2 2 2_dad(dad;unaffected),2_mom(mom;unaffected),2_kid(child;affected) C/C,C/C,C/T 2_kid 2
chr10 48004991 48004992 C T ASAH2C variant_id:3 3 3_dad(dad;unaffected),3_mom(mom;unaffected),3_kid(child;affected) C/C,C/C,C/T 3_kid 2
chr10 135336655 135336656 G A SPRN variant_id:4 2 2_dad(dad;unaffected),2_mom(mom;unaffected),2_kid(child;affected) G/G,G/G,G/A 2_kid 2
chr10 135336655 135336656 G A SPRN variant_id:4 1 1_dad(dad;unaffected),1_mom(mom;unaffected),1_kid(child;affected) G/G,G/G,G/A 1_kid 2
chr10 135
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bx.bbi import bigwig_file | |
from bx.bbi import bigbed_file | |
print bigwig_file | |
import time | |
from hashlib import md5 | |
prefix = '/data/gemini/data/hg19_fitcons_fc-i6-0_V1-01' | |
print "before bw: 8.6 seconds" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This will standardize input file genotype fields to have AD=REF,ALT1,ALT2 ... (a | |
la GTK). | |
For platypus: --depth NR --alt-depth NV | |
varscan: --ref-depth RD --alt-depth AD | |
samtools: --ref-depth DP4 (untested) | |
""" | |
import argparse |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -eo pipefail | |
REF=human_g1k_v37_decoy.fasta | |
VCF=Mills_and_1000G_gold_standard.indels.b37.vcf.gz | |
VCF=/usr/local/src/gemini/test/test.mult-alts.fb.vep.vcf.gz | |
java -Xmx4G -jar /usr/local/src/gatk/GenomeAnalysisTK.jar -T LeftAlignAndTrimVariants --splitMultiallelics --trimAlleles \ | |
-R $REF \ | |
--variant $VCF -o gatk.norm.vcf |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Generate sorted random numbers. | |
method from: http://repository.cmu.edu/cgi/viewcontent.cgi?article=3483&context=compsci | |
Has same interface as random module except it takes N as the first argument | |
for the number of numbers to generate. | |
""" | |
from random import random as rand, seed | |
def random(N): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"compress/gzip" | |
"container/heap" | |
"fmt" | |
"io" | |
"os" | |
"strconv" |