Skip to content

Instantly share code, notes, and snippets.

@radaniba
Created November 29, 2012 17:05
Show Gist options
  • Select an option

  • Save radaniba/4170421 to your computer and use it in GitHub Desktop.

Select an option

Save radaniba/4170421 to your computer and use it in GitHub Desktop.
Converts all of the BioRuby capable sequence formats to a Tipdate file. A space is left for the tree to be included.
#!/usr/bin/ruby
# Read in a fast file and spit it out as tipdate
### IMPORTS
require 'test/unit/assertions'
require 'pp'
require 'csv'
require 'bio'
include Test::Unit::Assertions
### CONSTANTS & DEFINES
### UTILS
def interpolate(str, sub_hash)
return str.gsub(/\{([^}]+)\}/) { |m|
sub_hash[$1]
}
end
### IMPLEMENTATION
### MAIN
# Parse commandline arguments.
#
def parse_clargs(arg_arr)
clopts = {
:save => "{root}.tipdate",
:overwrite => false,
:reduce => false,
:unique => false,
}
pargs = []
OptionParser.new { |opts|
opts.program_name = __FILE__
opts.banner = "Convert a fasta file to the skeleton of a Tipdate one"
opts.separator("")
opts.separator("Usage: #{opts.program_name} [options] ALN1 ...]")
opts.on('-h', '--help', 'Display this screen') {
puts opts
exit
}
opts.on('', '--save STR',
"Name output files according this template") { |v|
clopts[:save] = v
}
opts.on('-o', '--overwrite',
"Overwrite pre-existing files") {
clopts[:overwrite] = true
}
begin
opts.parse!(arg_arr)
pargs = arg_arr
assert(1 <= pargs.length, "need files to work on")
rescue Exception => e
error_msg = e.to_str.split("\n")
print "Error: #{error_msg[0]}\n\n"
print opts
exit 1
end
}
return clopts, pargs
end
# Main script functionality.
#
def main()
clopts, aln_files = parse_clargs(ARGV)
aln_files.each { |f|
# read in seqs
seqs = Bio::FlatFile.open(f) { |rdr|
rdr.collect { |seq|
seq.to_seq
}
}
# get header info
seq_cnt = seqs.length
max_seq_len = 0
max_name_len = 0
seqs.each { |seq|
max_seq_len = [max_seq_len, seq.length].max()
max_name_len = [max_name_len, seq.entry_id.length].max()
}
name_width = max_name_len + 8
# write output
# make filename
ext = File.extname(f)
subs = {
"ext" => ext[1, ext.length],
"base" => File.basename(f),
"root" => File.basename(f, ext),
"date" => Date.today.to_s(),
"time" => Time.now.strftime(fmt='%T'),
"datetime" => DateTime.now.strftime(fmt='%F T%T'),
}
out_name = interpolate(clopts[:save], subs)
# do the writing
puts "Saving results to '#{out_name}' ..."
if File.exists?(out_name)
assert(clopts[:overwrite], "Can't overwrite existing file '#{out_name}'")
end
File.open(out_name, 'w') { |wrtr|
wrtr << "#{seq_cnt} #{max_seq_len}\n"
seqs.each { |seq|
seq_name = "#{seq.entry_id}"
wrtr << "#{format("%-#{name_width}s", seq_name)}#{seq.to_s.upcase}\n"
}
wrtr << "1\n"
wrtr << "(your tree here);\n"
puts "Saved '#{out_name}'."
}
}
puts "== Finished."
end
if $0 == __FILE__
main()
end
### END
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment