Created
February 26, 2013 21:34
-
-
Save radaniba/5042429 to your computer and use it in GitHub Desktop.
A simple script to grab bioseqs by accession. Requires BioRuby. This just wraps the BioRuby fetch functionality in a friendly commandline interface. In brief, it can accept accession ids on the commandline or from a piped file (one accession per line) and save the corresponding sequences from the db. Sequences may be downloaded via the bioruby o…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# download sequences from db by id | |
### IMPORTS | |
require 'bio' | |
require 'ostruct' | |
require 'timeout' | |
require 'pp' | |
require 'test/unit/assertions' | |
include Test::Unit::Assertions | |
include Test::Unit | |
### DEFINES & CONSTANTS | |
$SERVERS = { | |
:bioruby => "http://bioruby.org/cgi-bin/biofetch.rb", | |
:ebi => "http://www.ebi.ac.uk/cgi-bin/dbfetch", | |
} | |
$DEFAULTS = OpenStruct.new({ | |
:db => 'genbank', | |
:overwrite => false, | |
:read_stdin => false, | |
:server => $SERVERS.keys[0] | |
}) | |
### IMPLEMENTATION | |
def fetch_and_save_seq (fetch, db, id, overwrite) | |
begin | |
data = Bio::Fetch.query(db, id) | |
filename = "#{id}.#{db}" | |
assert(overwrite || (not File.exists?(filename))) | |
File.open(filename, 'wb') { |f| | |
f.write(data) | |
} | |
puts "'#{filename}' saved" | |
rescue Timeout::Error => err | |
puts "'#{id}' timed out, id possibly not in db" | |
rescue AssertionFailedError => err | |
puts "can't overwrite '#{filename}'" | |
end | |
end | |
### MAIN | |
# Parse commandline arguments. | |
# | |
def parse_clargs(arg_arr) | |
clopts = $DEFAULTS.dup() | |
OptionParser.new { |opts| | |
opts.program_name = __FILE__ | |
opts.banner = "Download sequences from online databases." | |
opts.separator("") | |
opts.separator("Reads sequence ids either from the commmandline or STDIN") | |
opts.separator("and downloads them from the specified database.") | |
opts.separator("") | |
opts.separator("Usage: #{opts.program_name} [options] [ID1 ID2 ...]") | |
opts.on('-h', '--help', 'Display this screen') { | |
puts opts | |
exit | |
} | |
opts.on('', '--server NAME', "Which server to query", $SERVERS.keys) { |v| | |
clopts.servers = v | |
} | |
opts.on('', '--read-stdin', "Read sequence ids from standard input") { | |
clopts.read_stdin = true | |
} | |
opts.on('-o', '--overwrite', "Overwrite pre-existing files") { | |
clopts.overwrite = true | |
} | |
begin | |
opts.parse!(arg_arr) | |
rescue OptionParser::InvalidOption => e | |
puts e | |
puts opts | |
exit 1 | |
end | |
} | |
pargs = arg_arr | |
return clopts, pargs | |
end | |
def main | |
clopts, pargs = parse_clargs(ARGV) | |
url = $SERVERS[clopts.server] | |
server = Bio::Fetch.new(url) | |
db = clopts.db | |
# handle positional arguments | |
pargs.each { |e| | |
fetch_and_save_seq(server, db, e, clopts.overwrite) | |
} | |
# parse stdin if asked to | |
if clopts.read_stdin | |
while (line = STDIN.gets()) | |
fetch_and_save_seq(server, db, line.strip(), clopts.overwrite) | |
end | |
end | |
end | |
if $0 == __FILE__ | |
main() | |
end | |
### END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment