Created
November 5, 2009 16:03
-
-
Save antunderwood/227160 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Author:: Anthony Underwood | |
# Date:: 2008-07-04 | |
require 'rubygems' | |
require 'bio' | |
# Method to perform a remote blast | |
# input:: a sequence object , a string specifiying the program (blastn, blastp, blastx etc), a remote database (e.g nr-nt for non-redundant) and options (e.g -e 0.0001) | |
# output:: a Bio::Blast::Report object | |
def remote_blast(seq_obj, program, db = 'nr-nt', options = '', server = 'genomenet') | |
# create BLAST factory object | |
factory = Bio::Blast.remote(program, db, options, server) | |
report = factory.query(seq_obj) | |
end | |
# Method to perform a local blast | |
# input:: a sequence object, path to database and a string specifiying the program (blastn, blastp, blastx etc) | |
# output:: a Bio::Blast::Report object | |
def local_blast(seq_obj, path_to_blast_database, program = 'blastn') | |
factory = Bio::Blast.local(program, path_to_blast_database, '-m 8') | |
report = factory.query(seq_obj) | |
end | |
# Method to return details of the first hit from a blast search | |
# input:: a Bio::Blast::Report object | |
# output:: the hit defition, overlap and percent identity | |
def get_first_hit_details(report) | |
return get_hit_details(report, 1) | |
end | |
# Method to return details of the x hit from a blast search | |
# input:: a Bio::Blast::Report object | |
# output:: the hit defition, overlap and percent identity | |
def get_hit_details(report, hit_number) | |
hit = report.hits[hit_number-1] | |
if !hit.nil? | |
return hit.definition, hit.overlap, hit.percent_identity | |
else | |
return nil, nil, nil | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Bio | |
class Blast | |
def exec_ncbi(query) | |
host = "www.ncbi.nlm.nih.gov" | |
path = "/blast/Blast.cgi" | |
matrix = @matrix ? @matrix : 'blosum62' | |
filter = @filter ? @filter : 'L' | |
opt = [] | |
opt.concat(@options) if @options | |
submit_params = { | |
'CMD' => 'Put', | |
'FORMAT_OBJECT' => 'Alignment', | |
'COMPOSITION_BASED_STATISTICS' => 'off', | |
'DATABASE' => @db, | |
'EXPECT' => '1e-3', | |
'MATRIX_NAME' => matrix, | |
'FILTER' => filter, | |
'PROGRAM' => @program, | |
'SERVICE' => 'plain', | |
'QUERY' => CGI.escape(query), | |
'OTHER_ADVANCED' => CGI.escape(Bio::Command.make_command_line_unix(opt)), | |
} | |
retrieve_params = { | |
'CMD' => 'Get', | |
'DESCRIPTIONS' => 100, | |
'FORMAT_TYPE' => 'Text', | |
'ALIGNMENTS' => 50, | |
'ALIGNMENT_VIEW' => 'Tabular' | |
} | |
data = [] | |
submit_params.each do |k, v| | |
data.push("#{k}=#{v}") if v | |
end | |
report = nil | |
# begin | |
success = false | |
while !success | |
http = Bio::Command.new_http(host) | |
http.open_timeout = 300 | |
http.read_timeout = 600 | |
result, = http.post(path, data.join('&')) | |
output = result.body | |
# find RID | |
rid ="" | |
output.each do |line| | |
if line =~ /RID = (\w+)\n$/ | |
rid = $1 | |
break | |
end | |
end | |
retrieve_param_data = [] | |
retrieve_params.each do |k, v| | |
retrieve_param_data.push("#{k}=#{v}") if v | |
end | |
# push on RID value | |
retrieve_param_data.push("RID=#{rid}") | |
status = "WAITING" | |
# puts rid | |
# retrieve result | |
until status !~ /WAITING/ | |
sleep 5 | |
result, = http.post(path, retrieve_param_data.join('&')) | |
output = result.body | |
status = "" | |
output.each do |line| | |
if line =~ /Status=(.+)\n$/ | |
status = $1 | |
break | |
end | |
end | |
status = "NO STATUS" if status == "" | |
# puts status | |
end | |
# if READY reponse parse result and return report | |
if status == "READY" | |
raw_txt = output.split(/\<\/?PRE\>/)[1] | |
tab_format ="" | |
raw_txt.each do |line| | |
tab_format += line unless line =~ /^#/ or line !~ /\t/# remove blank and commented lines | |
end | |
success = true | |
# else | |
# raise "There was an error obtaining the result. Status: #{status}" | |
end | |
end | |
return tab_format | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'bio' | |
require 'blast_functions' | |
require 'ncbi_blast' | |
ENV['http_proxy'] = "http://proxy_server_ip:port_numer" | |
neuraminidase_sequence = "ATGAATCCAAATCAGAAAATAATAACCATTGGATCAATCTGTCTGGTAGTCGGACTAATTAGCCTAATATTGCAAATAGGGAATATAATCTCAATATGGATTAGCCATTCAATTCAAACTGGAAGTCAAAACCATACTGGAATATGCAACCAAAACATCATTACCTATAAAAATAGCACCTGGGTAAAGGACACAACTTCAGTGATATTAACCGGCAATTCATCTCTTTGTCCCATCCGTGGGTGGGCTATATACAGCAAAGACAATAGCATAAGAATTGGTTCCAAAGGAGACGTTTTTGTCATAAGAGAGCCCTTTATTTCATGTTCTCACTTGGAATGCAGGACCTTTTTTCTGACCCAAGGTGCCTTACTGAATGACAGGCATTCAAATGGGACTGTTAAGGACAGAAGCCCTTATAGGGCCTTAATGAGCTGCCCTGTCGGTGAAGCTCCGTCCCCGTACAATTCAAGATTTGAATCGGTTGCTTGGTCAGCAAGTGCATGTCATGATGGCATGGGCTGGCTAACAATCGGAATTTCAGGTCCAGATAATGGAGCAGTGGCTGTATTAAAATACAACGGCATAATAACTGAAACCATAAAAAGTTGGAGGAAGAAAATATTGAGGACACAAGAGTCTGAATGTGCCTGTGTAAATGGTTCATGTTTTACTATAATGACTGATGGCCCGAGTGATGGGCTGGCCTCGTACAAAATTTTCAAGATCGAAAAGGGGAAGGTTACTAAATCAATAGAGTTGAATGCACCTAATTCTCACTATGAGGAATGTTCCTGTTACCCTGATACCGGCAAAGTGATGTGTGTGTGCAGAGACAATTGGCATGGTTCGAACCGGCCATGGGTGTCTTTCGATCAAAACCTGGATTATCAAATAGGATACATCTGCAGTGGGGTTTTCGGTGACAACCCGCGTCCCAAAGATGGAACAGGCAGCTGTGGTCCAGTGTATGTTGATGGAGCAAACGGAGTAAAGGGATTTTCATATAGGTATGGTAATGGTGTTTGGATAGGAAGGACCAAAAGTCACAGTTCCAGACATGGGTTTGAGATGATTTGGGATCCTAATGGATGGACAGAGACTGATAGTAAGTTCTCTGTGAGGCAAGATGTTGTGGCAATGACTGATTGGTCAGGGTATAGCGGGAGTTTCGTTCAACATCCTGAGCTAACAGGGCTAGACTGTATAAGGCCGTGCTTCTGGGTTGAATTAATCAGGGGACGACCTAAAGAAAAAACAATCTGGACTAGTGCGAGCAGCATTTCTTTTTGTGGCGTGAATAGTGATACTGTAGATTGGTCTTGGCCAGACGGTGCTGAGTTGCCATTCACCATTGACAAGTAG" | |
blast_report = remote_blast(neuraminidase_sequence, 'blastn', 'nr' , '' , 'ncbi') | |
(1..5).each do |hit_number| | |
definition, overlap, percent_identity = get_hit_details(blast_report, hit_number) | |
if !definition.nil? | |
accession = definition.split("|")[3] | |
accession.sub!(/\..+$/, "") # remove version number | |
server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') | |
embl_text = server.fetch('embl', accession) | |
embl_object = Bio::EMBL.new(embl_text) | |
puts "\thit_number #{hit_number} --> " + embl_object.description | |
else | |
puts "\thit_number #{hit_number} --> none" | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment