Created
June 14, 2012 06:09
-
-
Save mfcovington/2928252 to your computer and use it in GitHub Desktop.
fasta_flattener
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# fasta_flattener.pl | |
# Mike Covington | |
# created: 2012-06-14 | |
# | |
# Description: flatten a FASTA file by removing white space | |
# | |
use strict; | |
use warnings; | |
use Bio::SeqIO; | |
use v5.10; #or later... or change 'say' to 'print' X_x | |
my $fasta_in = "input.fa"; | |
open my $fasta_out, ">", "output.fa"; | |
my $seqio_in = Bio::SeqIO->new( | |
-file => $fasta_in, | |
-format => 'Fasta', | |
); | |
my ( $seq_obj, %seq_hash ); | |
while ( my $seq_obj = $seqio_in->next_seq() ) { | |
my $seq_id = $seq_obj->display_id(); #this is the sequence ID | |
my $seq = $seq_obj->seq(); #this is the actual sequence | |
$seq_hash{$seq_id} = $seq; #and hashed! | |
#to print them to your screen in a "consolidated" FASTA format: | |
say ">$seq_id"; | |
say $seq_hash{$seq_id}; | |
#to save to a file in a "consolidated" FASTA format: | |
say $fasta_out ">$seq_id"; | |
say $fasta_out $seq_hash{$seq_id}; | |
} | |
exit; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>AT4G00050 | |
ATGAGCCAATGTGTTCCAAACTGTCACATCGATGATACTCCGGCAGCAGC | |
CACCACCACCGTCCGCTCCACCACAGCCGCAGACATCCCCATATTAGACT | |
ACGAGGTAGCCGAGCTGACGTGGGAGAACGGGCAACTAGGCTTGCACGGC | |
TTAGGTCCACCGCGAGTGACGGCTTCGTCGACCAAGTACTCCACAGGCGC | |
CGGTGGAACGTTGGAGTCGATAGTGGACCAAGCTACTCGCCTCCCTAACC | |
CTAAGCCCACGGATGAGCTCGTCCCGTGGTTCCATCATCGCTCCTCCAGG | |
GCCGCGATGGCAATGGACGCGCTTGTCCCTTGCTCCAACCTAGTACACGA | |
GCAGCAGAGCAAGCCTGGTGGCGTTGGCTCCACCCGGGTGGGGTCATGTA | |
GCGATGGTCGTACCATGGGCGGTGGAAAACGAGCAAGAGTGGCACCGGAG | |
TGGAGCGGCGGCGGGAGTCAGCGGCTGACCATGGACACTTACGACGTAGG | |
TTTCACCTCAACATCAATGGGCTCGCACGATAACACAATCGACGATCATG | |
ACTCCGTCTGCCACAGCCGCCCACAGATGGAGGACGAAGAAGAGAAGAAA | |
GCCGGAGGAAAATCATCAGTTTCAACCAAGAGAAGCAGAGCTGCTGCTAT | |
TCATAACCAATCCGAACGTAAGAGGAGAGATAAAATCAATCAAAGGATGA | |
AGACTTTGCAAAAACTGGTTCCCAATTCCAGCAAGACGGATAAAGCATCT | |
ATGTTGGATGAAGTGATAGAGTATTTGAAGCAACTTCAAGCACAAGTGAG | |
CATGATGAGCAGAATGAATATGCCTTCTATGATGCTTCCTATGGCCATGC | |
AGCAACAACAACAACTACAAATGTCTCTCATGTCCAATCCCATGGGTTTA | |
GGGATGGGCATGGGGATGCCCGGTCTCGGTCTCCTCGACCTTAATTCTAT | |
GAACCGAGCTGCTGCAAGCGCTCCTAATATCCATGCCAACATGATGCCAA | |
ACCCATTTTTGCCCATGAATTGTCCATCGTGGGATGCTTCTTCCAATGAC | |
TCTCGATTTCAGTCTCCTCTCATCCCCGATCCTATGTCTGCCTTTCTTGC | |
ATGCTCTACTCAGCCAACGACGATGGAAGCGTATAGCAGGATGGCTACAT | |
TATATCAGCAAATGCAACAACAACTTCCTCCTCCTTCGAATCCAAAATGA | |
>AT2G25930 | |
ATGAAGAGAGGGAAAGATGAGGAGAAGATATTGGAACCTATGTTTCCTCG | |
GCTTCATGTGAATGATGCAGATAAAGGAGGGCCTAGAGCTCCTCCTAGAA | |
ACAAGATGGCTCTTTATGAGCAGCTTAGTATTCCTTCTCAGAGGTTTGGT | |
GATCATGGAACGATGAATTCTCGTAGTAACAACACAAGCACTTTGGTTCA | |
TCCTGGACCATCTAGTCAGCCTTGTGGTGTGGAAAGAAACTTATCTGTCC | |
AGCATCTTGATTCTTCAGCCGCAAACCAAGCAACTGAGAAGTTTGTCTCC | |
CAAATGTCCTTCATGGAAAATGTGAGATCTTCGGCACAGCATGATCAGAG | |
GAAAATGGTGAGAGAGGAAGAAGATTTTGCAGTTCCAGTATATATTAACT | |
CAAGAAGATCTCAGTCTCATGGCAGAACCAAGAGTGGTATTGAGAAGGAA | |
AAACACACCCCAATGGTGGCACCTAGCTCTCATCACTCCATTCGATTTCA | |
AGAAGTGAATCAGACAGGCTCAAAGCAAAACGTATGTTTGGCTACTTGTT | |
CAAAACCTGAAGTTAGGGATCAGGTCAAGGCGAATGCAAGGTCAGGTGGC | |
TTTGTAATCTCTTTAGATGTATCAGTCACAGAGGAGATTGATCTCGAAAA | |
ATCAGCATCAAGTCATGATAGAGTAAATGATTATAATGCTTCCTTGAGAC | |
AAGAGTCTAGAAATCGGTTATACCGAGATGGTGGCAAAACTCGTCTGAAG | |
GACACTGATAATGGAGCTGAATCTCACTTGGCAACGGAAAATCATTCACA | |
AGAGGGTCATGGCAGTCCTGAAGACATTGATAATGATCGTGAATACAGCA | |
AAAGCAGAGCATGCGCCTCTCTGCAGCAGATAAATGAAGAGGCAAGTGAT | |
GACGTTTCTGATGATTCGATGGTGGATTCTATATCCAGCATAGATGTCTC | |
TCCCGATGATGTTGTGGGTATATTAGGTCAAAAACGTTTCTGGAGAGCAA | |
GGAAAGCCATTGCCAATCAACAAAGAGTATTTGCTGTTCAACTATTTGAG | |
TTGCACAGACTGATTAAGGTTCAAAAACTTATTGCTGCATCACCGGATCT | |
CTTGCTCGATGAGATCAGTTTTCTTGGAAAAGTTTCTGCTAAAAGCTATC | |
CAGTGAAGAAGCTCCTTCCATCAGAATTTCTGGTAAAGCCTCCTCTACCA | |
CATGTTGTCGTCAAACAAAGGGGTGACTCGGAGAAGACTGACCAACATAA | |
AATGGAAAGCTCAGCTGAGAACGTAGTTGGGAGGTTGTCAAATCAAGGTC | |
ATCATCAACAATCCAACTACATGCCTTTTGCAAACAACCCACCGGCTTCA | |
CCGGCTCCAAATGGATATTGCTTTCCTCCTCAGCCTCCTCCTTCAGGAAA | |
TCATCAGCAATGGTTGATCCCTGTAATGTCTCCCTCGGAAGGACTGATAT | |
ACAAGCCTCACCCAGGTATGGCACACACGGGGCATTATGGAGGATATTAT | |
GGTCATTATATGCCTACACCAATGGTAATGCCTCAATATCACCCCGGCAT | |
GGGATTCCCACCTCCTGGTAATGGCTACTTCCCTCCATATGGAATGATGC | |
CCACCATAATGAACCCATATTGTTCAAGCCAACAACAACAACAACAACAA | |
CCCAATGAGCAAATGAACCAGTTTGGACATCCTGGAAATCTTCAGAACAC | |
CCAACAACAACAACAGAGATCTGATAATGAACCTGCTCCACAGCAACAGC | |
AACAGCCAACAAAGTCTTATCCGCGAGCAAGAAAGAGCAGGCAAGGGAGC | |
ACAGGAAGCAGTCCAAGTGGGCCACAGGGAATCTCTGGTAGCAAGTCCTT | |
TCGGCCATTCGCAGCCGTTGATGAGGACAGCAACATCAACAATGCACCTG | |
AGCAAACGATGACAACAACCACAACGACGACAAGAACAACTGTTACTCAG | |
ACAACAAGAGATGGGGGAGGAGTGACGAGAGTGATAAAGGTGGTACCTCA | |
CAACGCAAAGCTCGCGAGTGAGAATGCTGCCAGAATTTTCCAGTCAATAC | |
AAGAAGAACGTAAACGCTATGACTCCTCTAAGCCTTAA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>AT4G00050 | |
ATGAGCCAATGTGTTCCAAACTGTCACATCGATGATACTCCGGCAGCAGCCACCACCACCGTCCGCTCCACCACAGCCGCAGACATCCCCATATTAGACTACGAGGTAGCCGAGCTGACGTGGGAGAACGGGCAACTAGGCTTGCACGGCTTAGGTCCACCGCGAGTGACGGCTTCGTCGACCAAGTACTCCACAGGCGCCGGTGGAACGTTGGAGTCGATAGTGGACCAAGCTACTCGCCTCCCTAACCCTAAGCCCACGGATGAGCTCGTCCCGTGGTTCCATCATCGCTCCTCCAGGGCCGCGATGGCAATGGACGCGCTTGTCCCTTGCTCCAACCTAGTACACGAGCAGCAGAGCAAGCCTGGTGGCGTTGGCTCCACCCGGGTGGGGTCATGTAGCGATGGTCGTACCATGGGCGGTGGAAAACGAGCAAGAGTGGCACCGGAGTGGAGCGGCGGCGGGAGTCAGCGGCTGACCATGGACACTTACGACGTAGGTTTCACCTCAACATCAATGGGCTCGCACGATAACACAATCGACGATCATGACTCCGTCTGCCACAGCCGCCCACAGATGGAGGACGAAGAAGAGAAGAAAGCCGGAGGAAAATCATCAGTTTCAACCAAGAGAAGCAGAGCTGCTGCTATTCATAACCAATCCGAACGTAAGAGGAGAGATAAAATCAATCAAAGGATGAAGACTTTGCAAAAACTGGTTCCCAATTCCAGCAAGACGGATAAAGCATCTATGTTGGATGAAGTGATAGAGTATTTGAAGCAACTTCAAGCACAAGTGAGCATGATGAGCAGAATGAATATGCCTTCTATGATGCTTCCTATGGCCATGCAGCAACAACAACAACTACAAATGTCTCTCATGTCCAATCCCATGGGTTTAGGGATGGGCATGGGGATGCCCGGTCTCGGTCTCCTCGACCTTAATTCTATGAACCGAGCTGCTGCAAGCGCTCCTAATATCCATGCCAACATGATGCCAAACCCATTTTTGCCCATGAATTGTCCATCGTGGGATGCTTCTTCCAATGACTCTCGATTTCAGTCTCCTCTCATCCCCGATCCTATGTCTGCCTTTCTTGCATGCTCTACTCAGCCAACGACGATGGAAGCGTATAGCAGGATGGCTACATTATATCAGCAAATGCAACAACAACTTCCTCCTCCTTCGAATCCAAAATGA | |
>AT2G25930 | |
ATGAAGAGAGGGAAAGATGAGGAGAAGATATTGGAACCTATGTTTCCTCGGCTTCATGTGAATGATGCAGATAAAGGAGGGCCTAGAGCTCCTCCTAGAAACAAGATGGCTCTTTATGAGCAGCTTAGTATTCCTTCTCAGAGGTTTGGTGATCATGGAACGATGAATTCTCGTAGTAACAACACAAGCACTTTGGTTCATCCTGGACCATCTAGTCAGCCTTGTGGTGTGGAAAGAAACTTATCTGTCCAGCATCTTGATTCTTCAGCCGCAAACCAAGCAACTGAGAAGTTTGTCTCCCAAATGTCCTTCATGGAAAATGTGAGATCTTCGGCACAGCATGATCAGAGGAAAATGGTGAGAGAGGAAGAAGATTTTGCAGTTCCAGTATATATTAACTCAAGAAGATCTCAGTCTCATGGCAGAACCAAGAGTGGTATTGAGAAGGAAAAACACACCCCAATGGTGGCACCTAGCTCTCATCACTCCATTCGATTTCAAGAAGTGAATCAGACAGGCTCAAAGCAAAACGTATGTTTGGCTACTTGTTCAAAACCTGAAGTTAGGGATCAGGTCAAGGCGAATGCAAGGTCAGGTGGCTTTGTAATCTCTTTAGATGTATCAGTCACAGAGGAGATTGATCTCGAAAAATCAGCATCAAGTCATGATAGAGTAAATGATTATAATGCTTCCTTGAGACAAGAGTCTAGAAATCGGTTATACCGAGATGGTGGCAAAACTCGTCTGAAGGACACTGATAATGGAGCTGAATCTCACTTGGCAACGGAAAATCATTCACAAGAGGGTCATGGCAGTCCTGAAGACATTGATAATGATCGTGAATACAGCAAAAGCAGAGCATGCGCCTCTCTGCAGCAGATAAATGAAGAGGCAAGTGATGACGTTTCTGATGATTCGATGGTGGATTCTATATCCAGCATAGATGTCTCTCCCGATGATGTTGTGGGTATATTAGGTCAAAAACGTTTCTGGAGAGCAAGGAAAGCCATTGCCAATCAACAAAGAGTATTTGCTGTTCAACTATTTGAGTTGCACAGACTGATTAAGGTTCAAAAACTTATTGCTGCATCACCGGATCTCTTGCTCGATGAGATCAGTTTTCTTGGAAAAGTTTCTGCTAAAAGCTATCCAGTGAAGAAGCTCCTTCCATCAGAATTTCTGGTAAAGCCTCCTCTACCACATGTTGTCGTCAAACAAAGGGGTGACTCGGAGAAGACTGACCAACATAAAATGGAAAGCTCAGCTGAGAACGTAGTTGGGAGGTTGTCAAATCAAGGTCATCATCAACAATCCAACTACATGCCTTTTGCAAACAACCCACCGGCTTCACCGGCTCCAAATGGATATTGCTTTCCTCCTCAGCCTCCTCCTTCAGGAAATCATCAGCAATGGTTGATCCCTGTAATGTCTCCCTCGGAAGGACTGATATACAAGCCTCACCCAGGTATGGCACACACGGGGCATTATGGAGGATATTATGGTCATTATATGCCTACACCAATGGTAATGCCTCAATATCACCCCGGCATGGGATTCCCACCTCCTGGTAATGGCTACTTCCCTCCATATGGAATGATGCCCACCATAATGAACCCATATTGTTCAAGCCAACAACAACAACAACAACAACCCAATGAGCAAATGAACCAGTTTGGACATCCTGGAAATCTTCAGAACACCCAACAACAACAACAGAGATCTGATAATGAACCTGCTCCACAGCAACAGCAACAGCCAACAAAGTCTTATCCGCGAGCAAGAAAGAGCAGGCAAGGGAGCACAGGAAGCAGTCCAAGTGGGCCACAGGGAATCTCTGGTAGCAAGTCCTTTCGGCCATTCGCAGCCGTTGATGAGGACAGCAACATCAACAATGCACCTGAGCAAACGATGACAACAACCACAACGACGACAAGAACAACTGTTACTCAGACAACAAGAGATGGGGGAGGAGTGACGAGAGTGATAAAGGTGGTACCTCACAACGCAAAGCTCGCGAGTGAGAATGCTGCCAGAATTTTCCAGTCAATACAAGAAGAACGTAAACGCTATGACTCCTCTAAGCCTTAA |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment