Invoke comme ca,
ruby ghetto_blaster.rb query.fa database.fa
LEVEL = 9 | |
database = ARGV[0] | |
query = ARGV[1] | |
require 'zlib' | |
# Compression function. | |
def deflate(string) | |
z = Zlib::Deflate.new(LEVEL) | |
dst = z.deflate(string, Zlib::FINISH) | |
z.close | |
dst | |
end | |
# Load entire database into memory. | |
db = Hash.new | |
File.new(database).read.split(/^>/).each do |record| | |
next if record == "" # split makes a blank one for some reason | |
record = record.split("\n") | |
header, sequence = record[0], record[1..-1] | |
db[header] = sequence.join('') | |
end | |
# Read query sequence, and compress. | |
query_sequence = \ | |
File.new(query).read.split(/^>/)[1].split("\n")[1..-1].join('') # :D | |
# Find database sequence is greatest reduction in size | |
def query(query, db) | |
query_size = deflate(query).length | |
best, winner = 0, '' | |
db.each do |k, seq| | |
# Compress DB sequence | |
solo = deflate(seq).length | |
# Compress both, together | |
together = deflate(seq + query).length | |
# Score | |
if together < (solo + query_size) | |
score = (solo + query_size - together)/seq.length.to_f # Normalize by subject length | |
if score > best | |
best = score | |
winner = k | |
end | |
end | |
end | |
{:score => best, :hit => winner} | |
end | |
result = query(query_sequence, db) | |
puts result[:hit] |