jimhester · January 8, 2018 14:50
diff --git a/append.pl b/append.pl
 #!/usr/bin/env perl

 use warnings;use strict;

 my ($header,$sequence);
 $header = <>;
 chomp $header;
 while(my $line = <>){
  chomp $line;
  if($line =~ /^>/){
    $header = substr($header,1);
    $header = $line;
    $sequence = ''
    print "$sequence\tlength($sequence)\n";
  } else {
    $sequence .= $line;
  }
 }
diff --git a/fasta-bioperl.pl b/fasta-bioperl.pl
 #!/usr/bin/env perl

 use warnings;use strict;
 use Bio::SeqIO;

 my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta');

 while(my $rec = $in->next_seq() ){
  print join(" ",$rec->display_id,$rec->length)."\n";
 }
diff --git a/fasta-bioperl2.pl b/fasta-bioperl2.pl
 #!/usr/bin/env perl

 use warnings;use strict;
 use Bio::SeqIO;

 my $in = Bio::SeqIO->new(-file => shift, -format => 'Fasta', -alphabet => 'dna');

 while(my $rec = $in->next_seq() ){
  print join(" ",$rec->display_id,$rec->length)."\n";
 }
diff --git a/fasta-biopython.py b/fasta-biopython.py
 #!/usr/bin/env python

 import sys
 from Bio import SeqIO

 for record in SeqIO.parse(sys.argv[1],'fasta'):
  print record.id, len(record)
diff --git a/fasta-bioruby.rb b/fasta-bioruby.rb
 #!/usr/bin/env ruby

 require 'bio'

 ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF)
 ff.each_entry do |record|
  puts [record.definition, record.nalen.to_s ].join(" ")
 end
diff --git a/fasta.go b/fasta.go
 package main

 import (
  "bufio"
  "io"
  "fmt"
  "os"
  "strings"
 )

 type fasta struct {
  header string
  sequence string
 }

 func NewFastxReader(f io.Reader) *FastxReader {
        return &FastxReader{
                r:         bufio.NewReader(f),
        }
 }
 type FastxReader struct {
  r   *bufio.Reader
 }
 func (r *FastxReader) next_seq() (record fasta, err error) {
  var str string
  if str, err = r.r.ReadString('>'); err == nil {
     if str, err = r.r.ReadString('>'); err == nil {
       split_result := strings.SplitN(str, "\n", 2)
       record.header = split_result[0]
       //remove newlines and trailing >
       record.sequence = chomp(strings.Replace(split_result[1], "\n", "", -1), ">")
     }
  }
  return record, err
 }

 //remove last char in a string if that char is the delim
 func chomp(s string, delim string) string {
   if s[len(s)-1] == delim[0] {
    return s[0:len(s)-1]
  }
  return s
 }

 func main() {
  file, _ := os.Open(os.Args[1])
  fastx = NewFastxReader(file)
  for rec, err := fastx.next_seq(); err == nil; {
    fmt.Println(rec.header, "\t", len(rec.sequence))
    rec, err = fastx.next_seq();
  }
 }
diff --git a/fasta.pl b/fasta.pl
 #!/usr/bin/env perl

 use warnings;use strict;

 local $/ = ">";
 my $first = <>;
 while(my $record = <>){
  chomp $record;
  my $newline_loc = index($record,"\n");
  my $header = substr($record,0,$newline_loc);
  my $sequence = substr($record,$newline_loc+1);
  $sequence =~ tr/\n//d;
  print "$sequence\t" . length($sequence) . "\n";
 }
diff --git a/fasta.py b/fasta.py
 #!/usr/bin/env python

 import os,sys

 f = open(sys.argv[1],'rU')
 header = f.readline()
 header = header.rstrip(os.linesep)
 sequence=''
 for line in f:
  line = line.rstrip('\n')
  if(line[0] == '>'):
    header = header[1:]
    header = line
    print header, len(sequence)
    sequence = ''
  else:
    sequence += line

 print header, len(sequence)
diff --git a/fasta.rb b/fasta.rb
 #!/usr/bin/env ruby

 $/ = ">"
 ARGF.gets
 while rec = ARGF.gets
  rec.chomp!
  nl = rec.index("\n")
  header = rec[0..nl-1]
  seq = rec[nl+1..-1]
  seq.gsub!(/\n/,'')
  puts [header, seq.length].join(" ")
 end
diff --git a/fasta2.py b/fasta2.py
 #!/usr/bin/env python

 import os,sys

 f = open(sys.argv[1],'rU')
 header = f.readline()
 header = header.rstrip(os.linesep)
 sequences=[]
 for line in f:
  line = line.rstrip('\n')
  if(line[0] == '>'):
    header = header[1:]
    header = line
    print header, len("".join(sequences))
    sequences = []
  else:
    sequences.append(line)

 print header, len("".join(sequences))
diff --git a/fasta_example.fa b/fasta_example.fa
 >chr1|this is a fasta header|example
 CCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCTA
 AACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCCTAAACCCTAAACCTAAACCCTGAACCCTAAACCTAAACCC
 TGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCCTGAACCCTAACCCCTGAACCCTAAACCCTGAACCCTAAA
 CCCTGAAACCTGAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCC
 >chr2|another record
 TAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCATGAACCCTAAACCCTGAACCCTAAACCCTAAA
 CCCTGAACCCTAAACCCTGAACCCTAAACCTAAACCCTAAACCCTGAACCCTAAACCTGAACCCTAAACCCCTAAACCTA
 AACCCTGAAACCTAAACCCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTGAAACCCTGAACCCTAAACCATGAA
 CCCTGAACCCTAAACCCTAAACCCTAAACCCTGAACCCTGAACCCTAAACCTAAACCCTGAACCCTGAACCCTAAACCCT
 GAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAA
	#!/usr/bin/env perl

	use warnings;use strict;

	my ($header,$sequence);
	$header = <>;
	chomp $header;
	while(my $line = <>){
	chomp $line;
	if($line =~ /^>/){
	$header = substr($header,1);
	$header = $line;
	$sequence = ''
	print "$sequence\tlength($sequence)\n";
	} else {
	$sequence .= $line;
	}
	}
	#!/usr/bin/env perl

	use warnings;use strict;
	use Bio::SeqIO;

	my $in = Bio::SeqIO->new(-file => shift, '-format' => 'Fasta');

	while(my $rec = $in->next_seq() ){
	print join(" ",$rec->display_id,$rec->length)."\n";
	}
	#!/usr/bin/env python

	import sys
	from Bio import SeqIO

	for record in SeqIO.parse(sys.argv[1],'fasta'):
	print record.id, len(record)
	#!/usr/bin/env ruby

	require 'bio'

	ff = Bio::FlatFile.new(Bio::FastaFormat,ARGF)
	ff.each_entry do \|record\|
	puts [record.definition, record.nalen.to_s ].join(" ")
	end
	package main

	import (
	"bufio"
	"io"
	"fmt"
	"os"
	"strings"
	)

	type fasta struct {
	header string
	sequence string
	}

	func NewFastxReader(f io.Reader) *FastxReader {
	return &FastxReader{
	r: bufio.NewReader(f),
	}
	}
	type FastxReader struct {
	r *bufio.Reader
	}
	func (r *FastxReader) next_seq() (record fasta, err error) {
	var str string
	if str, err = r.r.ReadString('>'); err == nil {
	if str, err = r.r.ReadString('>'); err == nil {
	split_result := strings.SplitN(str, "\n", 2)
	record.header = split_result[0]
	//remove newlines and trailing >
	record.sequence = chomp(strings.Replace(split_result[1], "\n", "", -1), ">")
	}
	}
	return record, err
	}

	//remove last char in a string if that char is the delim
	func chomp(s string, delim string) string {
	if s[len(s)-1] == delim[0] {
	return s[0:len(s)-1]
	}
	return s
	}

	func main() {
	file, _ := os.Open(os.Args[1])
	fastx = NewFastxReader(file)
	for rec, err := fastx.next_seq(); err == nil; {
	fmt.Println(rec.header, "\t", len(rec.sequence))
	rec, err = fastx.next_seq();
	}
	}
	#!/usr/bin/env perl

	use warnings;use strict;

	local $/ = ">";
	my $first = <>;
	while(my $record = <>){
	chomp $record;
	my $newline_loc = index($record,"\n");
	my $header = substr($record,0,$newline_loc);
	my $sequence = substr($record,$newline_loc+1);
	$sequence =~ tr/\n//d;
	print "$sequence\t" . length($sequence) . "\n";
	}
	#!/usr/bin/env python

	import os,sys

	f = open(sys.argv[1],'rU')
	header = f.readline()
	header = header.rstrip(os.linesep)
	sequence=''
	for line in f:
	line = line.rstrip('\n')
	if(line[0] == '>'):
	header = header[1:]
	header = line
	print header, len(sequence)
	sequence = ''
	else:
	sequence += line

	print header, len(sequence)
	#!/usr/bin/env ruby

	$/ = ">"
	ARGF.gets
	while rec = ARGF.gets
	rec.chomp!
	nl = rec.index("\n")
	header = rec[0..nl-1]
	seq = rec[nl+1..-1]
	seq.gsub!(/\n/,'')
	puts [header, seq.length].join(" ")
	end
	>chr1\|this is a fasta header\|example
	CCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCTA
	AACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAACCCTAAACCCTAAACCTAAACCCTGAACCCTAAACCTAAACCC
	TGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCCTGAACCCTAACCCCTGAACCCTAAACCCTGAACCCTAAA
	CCCTGAAACCTGAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCC
	>chr2\|another record
	TAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTAAACCTAAACCATGAACCCTAAACCCTGAACCCTAAACCCTAAA
	CCCTGAACCCTAAACCCTGAACCCTAAACCTAAACCCTAAACCCTGAACCCTAAACCTGAACCCTAAACCCCTAAACCTA
	AACCCTGAAACCTAAACCCTAAACCCTGAACCCTAAACCCTAAACCCTGAACCCTGAAACCCTGAACCCTAAACCATGAA
	CCCTGAACCCTAAACCCTAAACCCTAAACCCTGAACCCTGAACCCTAAACCTAAACCCTGAACCCTGAACCCTAAACCCT
	GAACCCTAAACCCTAAACCCTGAACCCTAAACCCTGAACCCTAAACCCTGAACCCTGAACCCTAAACCCTGAACCCTAAA