Skip to content

Instantly share code, notes, and snippets.

@mfcovington
Last active December 6, 2016 00:40
Show Gist options
  • Save mfcovington/801f946bbbb064a5bf640b4810d996f6 to your computer and use it in GitHub Desktop.
Save mfcovington/801f946bbbb064a5bf640b4810d996f6 to your computer and use it in GitHub Desktop.
function extract_sequences {
GENE=$1
EXON=$2
CHR=$3
BAM_START=$4
BAM_END=$5
EXON_START=$6
EXON_END=$7
for GENO in R500 IMB211; do
REF=~/git.repos/sample-files/fa/Brapa_sequence_v1.5.fa
BAM=bam/$GENO.${CHR}_$BAM_START-$BAM_END.bam
TEMP_FQ=$GENO.${CHR}_$EXON_START-$EXON_END.consensus.fq
TEMP_FA=$GENO.${CHR}_$EXON_START-$EXON_END.consensus.fa
FA=$GENE.$EXON.$GENO.fa
samtools mpileup -u \
-f $REF \
$BAM \
| bcftools view -cg - \
| vcfutils.pl vcf2fq \
> $TEMP_FQ
cat $TEMP_FQ \
| perl -E '
$i = 0;
while (<>) {
chomp;
if ( /^\@/ .. /^\+/ ) {
if (/^\@/) { $id = $_; $id =~ s/^\@/>/; say $id; $i = 0; }
elsif (/^\+/) { print "\n"; }
else { print; $i++; }
}
else { <> for ( 1 .. $i - 1 ); }
}
' \
> $TEMP_FA
samtools faidx $TEMP_FA $CHR:$EXON_START-$EXON_END > $FA
samtools faidx $REF $CHR:$EXON_START-$EXON_END > $GENE.$EXON.v1.5.fa
rm $TEMP_FA
rm $TEMP_FA.fai
rm $TEMP_FQ
done
}
# extract_sequences $GENE $EXON $CHR $BAM_START $BAM_END $EXON_START $EXON_END
extract_sequences Bra039547 1 A01 11523604 11664026 11563809 11565239
extract_sequences Bra012784 1 A03 22294228 22337237 22311911 22313383
extract_sequences Bra000453 1 A03 11157200 11235799 11210760 11210892
extract_sequences Bra000453 2 A03 11157200 11235799 11211687 11211816
extract_sequences Bra000453 3 A03 11157200 11235799 11212909 11213776
extract_sequences Bra004109 1 A07 20163223 20264613 20172546 20173380
extract_sequences Bra004109 2 A07 20163223 20264613 20173450 20173688
extract_sequences Bra004109 3 A07 20163223 20264613 20175423 20175621
extract_sequences Bra004109 4 A07 20163223 20264613 20176165 20176286
extract_sequences Bra004109 5 A07 20163223 20264613 20176753 20176820
extract_sequences Bra004109 6 A07 20163223 20264613 20176902 20177004
extract_sequences Bra004109 7 A07 20163223 20264613 20177270 20177374
mkdir exons
mv *fa exons/
mkdir cds
for GENE in Bra039547 Bra012784 Bra000453 Bra004109; do
for GENOTYPE in IMB211 R500 v1.5; do
echo ">$GENE $GENOTYPE" > cds/$GENE.$GENOTYPE.fa
grep -hve "^>" exons/$GENE.*.$GENOTYPE.fa >> cds/$GENE.$GENOTYPE.fa
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment