cat ref.gene.txt
chr1 1736 4272 DDX11L1 +
chr1 4224 19233 WASH7P -
chr1 4224 7502 LOC100288778 -
chr1 7231 7299 MIR6859-1 -
chr1 7231 7299 MIR6859-2 -
chr1 7231 7299 MIR6859-3 -
chr1 7231 7299 MIR6859-4 -
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# function Extract for common file formats | |
function extract { | |
if [ -z "$1" ]; then | |
# display usage if no parameters given | |
echo "Usage: extract <path/file_name>.<zip|rar|bz2|gz|tar|tbz2|tgz|Z|7z|xz|ex|tar.bz2|tar.gz|tar.xz>" | |
else | |
if [ -f "$1" ] ; then | |
NAME=${1%.*} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
human | mouse | |
---|---|---|
A1BG | A1bg | |
A1CF | A1cf | |
A2LD1 | A2ld1 | |
A2M | A2m | |
A4GALT | A4galt | |
A4GNT | A4gnt | |
AAAS | Aaas | |
AACS | Aacs | |
AADAC | Aadac |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
### This script is used to filter the vcf file produced by lumpy/speedseq | |
set -e | |
set -u | |
set -o pipefail | |
function usage() { | |
echo " |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
### This script is used to filter the vcf file produced by lumpy/speedseq | |
set -e | |
set -u | |
set -o pipefail | |
function usage() { | |
echo " |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## imagine we have a file with one line header, and we want to keep the header after sorting | |
## use subshells http://bash.cyberciti.biz/guide/What_is_a_Subshell%3F | |
(sed -n '1p' your_file; cat your_file | sed '1d' | sort) > sort_header.txt | |
## if you have two header lines and want to keep both of them: | |
(sed -n '1,2p' your_file; cat your_file | sed '1,2d' | sort) > sort_header.txt | |
## if you have many lines starting with "#" as header, like vcf files | |
(grep "^#" my_vcf; grep -v "^#" my_vcf | sort -k1,1V -k2,2n) > sorted.vcf |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in {1..22} X Y | |
do | |
same=$(comm -12 <(cut -f1,2,4,5 keep_original_bam/TCGA-06-0125_$i.call.keep.txt | sort) <(cut -f1,2,4,5 keep_realigned_bam/TCGA-06-0125_$i.call.keep.txt| sort) | wc -l | tr -d " ") | |
original=`wc -l keep_original_bam/TCGA-06-0125_$i.call.keep.txt | awk '{print $1}'` | |
realigned=`wc -l keep_realigned_bam/TCGA-06-0125_$i.call.keep.txt | awk '{print $1}'` | |
printf "chromosome $i, original_bam:$original, realigned_bam:$realigned, common:$same \n" | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# output of mutect run for each chromosome | |
# the first line is the mutect version, the second line is the header | |
# keep the header and filter mutect results based on 10th and 35th columns. | |
for file in *.call.txt | |
do (sed -n '2p' $file; awk -F "\t" '$10=="COVERED" && $35=="KEEP"' $file) > ./keep/"${file%txt}"keep.txt | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
chmod a+x vcfsort.sh | |
vcfsort.sh trio.trim.vep.vcf.gz |