Created
November 29, 2012 17:01
-
-
Save radaniba/4170398 to your computer and use it in GitHub Desktop.
A simple script to download sequences by accession, vuia BioRuby. (Like much of BioRuby, finding a relevant example of how to do something can often be difficult.) It can accept accession ids on the commandline or by a piped file (one accession per line).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # download sequences from db by id | |
| ### IMPORTS | |
| require 'bio' | |
| require 'ostruct' | |
| require 'timeout' | |
| require 'pp' | |
| require 'test/unit/assertions' | |
| include Test::Unit::Assertions | |
| include Test::Unit | |
| ### DEFINES & CONSTANTS | |
| $SERVERS = { | |
| :bioruby => "http://bioruby.org/cgi-bin/biofetch.rb", | |
| :ebi => "http://www.ebi.ac.uk/cgi-bin/dbfetch", | |
| } | |
| $DEFAULTS = OpenStruct.new({ | |
| :db => 'genbank', | |
| :overwrite => false, | |
| :read_stdin => false, | |
| :server => $SERVERS.keys[0] | |
| }) | |
| ### IMPLEMENTATION | |
| def fetch_and_save_seq (fetch, db, id, overwrite) | |
| begin | |
| data = Bio::Fetch.query(db, id) | |
| filename = "#{id}.#{db}" | |
| assert(overwrite || (not File.exists?(filename))) | |
| File.open(filename, 'wb') { |f| | |
| f.write(data) | |
| } | |
| puts "'#{filename}' saved" | |
| rescue Timeout::Error => err | |
| puts "'#{id}' timed out, id possibly not in db" | |
| rescue AssertionFailedError => err | |
| puts "can't overwrite '#{filename}'" | |
| end | |
| end | |
| ### MAIN | |
| # Parse commandline arguments. | |
| # | |
| def parse_clargs(arg_arr) | |
| clopts = $DEFAULTS.dup() | |
| OptionParser.new { |opts| | |
| opts.program_name = __FILE__ | |
| opts.banner = "Download sequences from online databases." | |
| opts.separator("") | |
| opts.separator("Reads sequence ids either from the commmandline or STDIN") | |
| opts.separator("and downloads them from the specified database.") | |
| opts.separator("") | |
| opts.separator("Usage: #{opts.program_name} [options] [ID1 ID2 ...]") | |
| opts.on('-h', '--help', 'Display this screen') { | |
| puts opts | |
| exit | |
| } | |
| opts.on('', '--server NAME', "Which server to query", $SERVERS.keys) { |v| | |
| clopts.servers = v | |
| } | |
| opts.on('', '--read-stdin', "Read sequence ids from standard input") { | |
| clopts.read_stdin = true | |
| } | |
| opts.on('-o', '--overwrite', "Overwrite pre-existing files") { | |
| clopts.overwrite = true | |
| } | |
| begin | |
| opts.parse!(arg_arr) | |
| rescue OptionParser::InvalidOption => e | |
| puts e | |
| puts opts | |
| exit 1 | |
| end | |
| } | |
| pargs = arg_arr | |
| return clopts, pargs | |
| end | |
| def main | |
| clopts, pargs = parse_clargs(ARGV) | |
| url = $SERVERS[clopts.server] | |
| server = Bio::Fetch.new(url) | |
| db = clopts.db | |
| # handle positional arguments | |
| pargs.each { |e| | |
| fetch_and_save_seq(server, db, e, clopts.overwrite) | |
| } | |
| # parse stdin if asked to | |
| if clopts.read_stdin | |
| while (line = STDIN.gets()) | |
| fetch_and_save_seq(server, db, line.strip(), clopts.overwrite) | |
| end | |
| end | |
| end | |
| if $0 == __FILE__ | |
| main() | |
| end | |
| ### END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment