Created
November 29, 2012 17:05
-
-
Save radaniba/4170421 to your computer and use it in GitHub Desktop.
Converts all of the BioRuby capable sequence formats to a Tipdate file. A space is left for the tree to be included.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/ruby | |
| # Read in a fast file and spit it out as tipdate | |
| ### IMPORTS | |
| require 'test/unit/assertions' | |
| require 'pp' | |
| require 'csv' | |
| require 'bio' | |
| include Test::Unit::Assertions | |
| ### CONSTANTS & DEFINES | |
| ### UTILS | |
| def interpolate(str, sub_hash) | |
| return str.gsub(/\{([^}]+)\}/) { |m| | |
| sub_hash[$1] | |
| } | |
| end | |
| ### IMPLEMENTATION | |
| ### MAIN | |
| # Parse commandline arguments. | |
| # | |
| def parse_clargs(arg_arr) | |
| clopts = { | |
| :save => "{root}.tipdate", | |
| :overwrite => false, | |
| :reduce => false, | |
| :unique => false, | |
| } | |
| pargs = [] | |
| OptionParser.new { |opts| | |
| opts.program_name = __FILE__ | |
| opts.banner = "Convert a fasta file to the skeleton of a Tipdate one" | |
| opts.separator("") | |
| opts.separator("Usage: #{opts.program_name} [options] ALN1 ...]") | |
| opts.on('-h', '--help', 'Display this screen') { | |
| puts opts | |
| exit | |
| } | |
| opts.on('', '--save STR', | |
| "Name output files according this template") { |v| | |
| clopts[:save] = v | |
| } | |
| opts.on('-o', '--overwrite', | |
| "Overwrite pre-existing files") { | |
| clopts[:overwrite] = true | |
| } | |
| begin | |
| opts.parse!(arg_arr) | |
| pargs = arg_arr | |
| assert(1 <= pargs.length, "need files to work on") | |
| rescue Exception => e | |
| error_msg = e.to_str.split("\n") | |
| print "Error: #{error_msg[0]}\n\n" | |
| print opts | |
| exit 1 | |
| end | |
| } | |
| return clopts, pargs | |
| end | |
| # Main script functionality. | |
| # | |
| def main() | |
| clopts, aln_files = parse_clargs(ARGV) | |
| aln_files.each { |f| | |
| # read in seqs | |
| seqs = Bio::FlatFile.open(f) { |rdr| | |
| rdr.collect { |seq| | |
| seq.to_seq | |
| } | |
| } | |
| # get header info | |
| seq_cnt = seqs.length | |
| max_seq_len = 0 | |
| max_name_len = 0 | |
| seqs.each { |seq| | |
| max_seq_len = [max_seq_len, seq.length].max() | |
| max_name_len = [max_name_len, seq.entry_id.length].max() | |
| } | |
| name_width = max_name_len + 8 | |
| # write output | |
| # make filename | |
| ext = File.extname(f) | |
| subs = { | |
| "ext" => ext[1, ext.length], | |
| "base" => File.basename(f), | |
| "root" => File.basename(f, ext), | |
| "date" => Date.today.to_s(), | |
| "time" => Time.now.strftime(fmt='%T'), | |
| "datetime" => DateTime.now.strftime(fmt='%F T%T'), | |
| } | |
| out_name = interpolate(clopts[:save], subs) | |
| # do the writing | |
| puts "Saving results to '#{out_name}' ..." | |
| if File.exists?(out_name) | |
| assert(clopts[:overwrite], "Can't overwrite existing file '#{out_name}'") | |
| end | |
| File.open(out_name, 'w') { |wrtr| | |
| wrtr << "#{seq_cnt} #{max_seq_len}\n" | |
| seqs.each { |seq| | |
| seq_name = "#{seq.entry_id}" | |
| wrtr << "#{format("%-#{name_width}s", seq_name)}#{seq.to_s.upcase}\n" | |
| } | |
| wrtr << "1\n" | |
| wrtr << "(your tree here);\n" | |
| puts "Saved '#{out_name}'." | |
| } | |
| } | |
| puts "== Finished." | |
| end | |
| if $0 == __FILE__ | |
| main() | |
| end | |
| ### END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment