Skip to content

Instantly share code, notes, and snippets.

@sahrendt0
Created October 31, 2012 09:15
Show Gist options
  • Save sahrendt0/3986010 to your computer and use it in GitHub Desktop.
Save sahrendt0/3986010 to your computer and use it in GitHub Desktop.
GEN 220 Week 5
#!/usr/bin/perl
# Script: blast_parse.pl
# Description: GEN220, week 5, problem 4
# Author: Steven Ahrendt
# email: [email protected]
#####################################
## QUERY HIT EVAL LENGTH IDENT QSTART QEND HSTART HEND
############################################################
use strict;
use warnings;
#use Bio::Index::BLAST;
use Bio::SearchIO;
my $blast_file = shift;
my $blastIO = Bio::SearchIO->new(-format => 'blast',
-file => $blast_file);
while (my $result = $blastIO->next_result)
{
while(my $hit = $result->next_hit)
{
while(my $hsp = $hit->next_hsp)
{
print $result->query_name,"\t";
print $hit->name,"\t";
print $hsp->evalue,"\t";
print $hsp->length('total'),"\t";
printf("%.2f\t", $hsp->percent_identity);
print $hsp->start('query'),"\t";
print $hsp->end('query'),"\t";
print $hsp->start('hit'),"\t";
print $hsp->end('hit'),"\n";
}
}
}
#!/usr/bin/perl
# Script: fasta_parse.pl
# Description: GEN220, week5, question 3
# Author: Steven Ahrendt
# email: [email protected]
###############################
## chr1, 54584..54913
#############################
use strict;
use warnings;
use Bio::DB::Fasta;
use Bio::Seq;
my $fasta_file = shift;
my $db = Bio::DB::Fasta->new($fasta_file);
my $seq = $db->subseq('chrI',54584,54913);
print "DNA: $seq\n";
my $prot = Bio::Seq->new(-seq => $seq)->translate->seq;
print "Prot: $prot\n";
#!/usr/bin/perl
# Script: gbk_parse.pl
# Description: GEN220: Week 5, questions 1 and 2
# Author: Steven Ahrendt
# email: [email protected]
#############################
use strict;
use warnings;
use Bio::Seq;
use Bio::SeqIO;
use Bio::DB::Fasta;
my $infile = shift;
my $informat = "fasta"; # default to fasta, but check and change if necessary
my @f = split(/\./,$infile);
if($f[-1] =~ m/^g/)
{
$informat = "genbank";
}
#my @cds_features = grep {$_->primary_tag eq "CDS"} Bio::SeqIO->new(-file => $infile)->next_seq->get_SeqFeatures;
my $seqio = Bio::SeqIO->new(-file => $infile);
my $num_seqs = 0;
my $total_seq_len = 0;
my $num_cds = 0;
my $total_cds_len = 0;
while(my $seq = $seqio->next_seq)
{
#print $seq->accession(),"\n";
$num_seqs++;
my @features = $seq->get_SeqFeatures;
foreach my $seq_feat (@features) #= $seq->get_SeqFeatures)
{
#print "\t",$seq_feat->primary_tag,"\n";
foreach my $tag ($seq_feat->get_all_tags)
{
#print $tag,"\t",$seq_feat->get_tag_values($tag),"\n";
}
if($seq_feat->primary_tag eq "CDS")
{
#print "\tcds: ";#,$seq_feat->get_tag_values($seq_feat->primary_tag),"\n";
my $length = ($seq_feat->end-$seq_feat->start)+1; #,"..",$seq_feat->end,"\n";
#print $length," (",$seq_feat->start,",",$seq_feat->end,")";
#print "\n";
$num_cds++;
$total_cds_len += $length;
}
}
$total_seq_len += $seq->length;
}
print "Number of sequences: $num_seqs\n";
print "Total length of all sequences: $total_seq_len\n";
print "Number of CDS features: $num_cds\n";
print "Total length of all CDS features: $total_cds_len\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment