Created
February 7, 2014 09:55
-
-
Save heikkil/8859960 to your computer and use it in GitHub Desktop.
Testing simple OOP approach to FASTA parsing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
package FastaSeq; | |
#use Mo qw'build default builder coerce is required'; | |
use Mo; | |
has id => (); | |
has descr => (); | |
has seq => (); | |
1; | |
package ReadFasta; | |
#use Mo; | |
use Mo qw'build'; | |
use autodie; | |
has infile => (); | |
my $F; | |
sub BUILD { | |
my $self = shift; | |
open $F, "<", $self->infile; | |
} | |
sub next_line { | |
my $self = shift; | |
while (<$F>) { | |
return $_; | |
} | |
} | |
sub next_seq { | |
my $self = shift; | |
local $/ = "\n>"; | |
while (<$F>) { | |
# return "<|$_|>"; | |
return unless $_; | |
my $entry = $_; | |
# print $entry; | |
my ($id) = $entry =~ /^>? *(\w*)/; | |
my ($descr) = $entry =~ /^>? *\w* ?(.*)\n/; | |
my ($str) = $entry =~ /^.*?\n(.*)/s; | |
$str =~ s/\W//g; | |
# print "$id\n"; | |
my $seq = FastaSeq->new (id => $id); | |
$seq->descr($descr) if $descr; | |
$seq->seq($str) if $str; | |
return $seq; | |
} | |
} | |
1; | |
package main; | |
use strict; | |
use warnings; | |
use Data::Dumper; | |
my $seq = FastaSeq->new(id => 'A1'); | |
$seq->seq('atcg'); | |
#print Dumper $seq; | |
my $in = ReadFasta->new(infile => 't/test.fa'); | |
my $s = $in->next_seq(); | |
print ">", $s->id, " ", $s->descr, "\n"; | |
print $s->seq, "\n"; | |
my $in2 = ReadFasta->new(infile => 't/test2.fa'); | |
my $s2 = $in2->next_seq(); | |
print ">", $s2->id, " ", $s2->descr, "\n"; | |
print $s2->seq, "\n"; | |
my $s3 = $in->next_seq(); | |
print ">", $s3->id, " ", $s3->descr, "\n"; | |
print $s3->seq, "\n"; | |
=pod | |
t/test1.fa | |
> A the first | |
acgtacgtacgt | |
> B the second | |
acgt acgt acgt | |
acgt acgt acgt | |
> C the third | |
acgt acgt acgt | |
acgt acgt acgt | |
acgt acgt acgt | |
t/test2.fa | |
> D the first | |
acgtacgtacgt | |
> E the second | |
acgt acgt acgt | |
acgt acgt acgt | |
> F the third | |
acgt acgt acgt | |
acgt acgt acgt | |
acgt acgt acgt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment