Created
October 31, 2012 09:15
-
-
Save sahrendt0/3986010 to your computer and use it in GitHub Desktop.
GEN 220 Week 5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Script: blast_parse.pl | |
# Description: GEN220, week 5, problem 4 | |
# Author: Steven Ahrendt | |
# email: [email protected] | |
##################################### | |
## QUERY HIT EVAL LENGTH IDENT QSTART QEND HSTART HEND | |
############################################################ | |
use strict; | |
use warnings; | |
#use Bio::Index::BLAST; | |
use Bio::SearchIO; | |
my $blast_file = shift; | |
my $blastIO = Bio::SearchIO->new(-format => 'blast', | |
-file => $blast_file); | |
while (my $result = $blastIO->next_result) | |
{ | |
while(my $hit = $result->next_hit) | |
{ | |
while(my $hsp = $hit->next_hsp) | |
{ | |
print $result->query_name,"\t"; | |
print $hit->name,"\t"; | |
print $hsp->evalue,"\t"; | |
print $hsp->length('total'),"\t"; | |
printf("%.2f\t", $hsp->percent_identity); | |
print $hsp->start('query'),"\t"; | |
print $hsp->end('query'),"\t"; | |
print $hsp->start('hit'),"\t"; | |
print $hsp->end('hit'),"\n"; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Script: fasta_parse.pl | |
# Description: GEN220, week5, question 3 | |
# Author: Steven Ahrendt | |
# email: [email protected] | |
############################### | |
## chr1, 54584..54913 | |
############################# | |
use strict; | |
use warnings; | |
use Bio::DB::Fasta; | |
use Bio::Seq; | |
my $fasta_file = shift; | |
my $db = Bio::DB::Fasta->new($fasta_file); | |
my $seq = $db->subseq('chrI',54584,54913); | |
print "DNA: $seq\n"; | |
my $prot = Bio::Seq->new(-seq => $seq)->translate->seq; | |
print "Prot: $prot\n"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Script: gbk_parse.pl | |
# Description: GEN220: Week 5, questions 1 and 2 | |
# Author: Steven Ahrendt | |
# email: [email protected] | |
############################# | |
use strict; | |
use warnings; | |
use Bio::Seq; | |
use Bio::SeqIO; | |
use Bio::DB::Fasta; | |
my $infile = shift; | |
my $informat = "fasta"; # default to fasta, but check and change if necessary | |
my @f = split(/\./,$infile); | |
if($f[-1] =~ m/^g/) | |
{ | |
$informat = "genbank"; | |
} | |
#my @cds_features = grep {$_->primary_tag eq "CDS"} Bio::SeqIO->new(-file => $infile)->next_seq->get_SeqFeatures; | |
my $seqio = Bio::SeqIO->new(-file => $infile); | |
my $num_seqs = 0; | |
my $total_seq_len = 0; | |
my $num_cds = 0; | |
my $total_cds_len = 0; | |
while(my $seq = $seqio->next_seq) | |
{ | |
#print $seq->accession(),"\n"; | |
$num_seqs++; | |
my @features = $seq->get_SeqFeatures; | |
foreach my $seq_feat (@features) #= $seq->get_SeqFeatures) | |
{ | |
#print "\t",$seq_feat->primary_tag,"\n"; | |
foreach my $tag ($seq_feat->get_all_tags) | |
{ | |
#print $tag,"\t",$seq_feat->get_tag_values($tag),"\n"; | |
} | |
if($seq_feat->primary_tag eq "CDS") | |
{ | |
#print "\tcds: ";#,$seq_feat->get_tag_values($seq_feat->primary_tag),"\n"; | |
my $length = ($seq_feat->end-$seq_feat->start)+1; #,"..",$seq_feat->end,"\n"; | |
#print $length," (",$seq_feat->start,",",$seq_feat->end,")"; | |
#print "\n"; | |
$num_cds++; | |
$total_cds_len += $length; | |
} | |
} | |
$total_seq_len += $seq->length; | |
} | |
print "Number of sequences: $num_seqs\n"; | |
print "Total length of all sequences: $total_seq_len\n"; | |
print "Number of CDS features: $num_cds\n"; | |
print "Total length of all CDS features: $total_cds_len\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment