Invoke comme ca,
ruby ghetto_blaster.rb query.fa database.fa
| LEVEL = 9 | |
| database = ARGV[0] | |
| query = ARGV[1] | |
| require 'zlib' | |
| # Compression function. | |
| def deflate(string) | |
| z = Zlib::Deflate.new(LEVEL) | |
| dst = z.deflate(string, Zlib::FINISH) | |
| z.close | |
| dst | |
| end | |
| # Load entire database into memory. | |
| db = Hash.new | |
| File.new(database).read.split(/^>/).each do |record| | |
| next if record == "" # split makes a blank one for some reason | |
| record = record.split("\n") | |
| header, sequence = record[0], record[1..-1] | |
| db[header] = sequence.join('') | |
| end | |
| # Read query sequence, and compress. | |
| query_sequence = \ | |
| File.new(query).read.split(/^>/)[1].split("\n")[1..-1].join('') # :D | |
| # Find database sequence is greatest reduction in size | |
| def query(query, db) | |
| query_size = deflate(query).length | |
| best, winner = 0, '' | |
| db.each do |k, seq| | |
| # Compress DB sequence | |
| solo = deflate(seq).length | |
| # Compress both, together | |
| together = deflate(seq + query).length | |
| # Score | |
| if together < (solo + query_size) | |
| score = (solo + query_size - together)/seq.length.to_f # Normalize by subject length | |
| if score > best | |
| best = score | |
| winner = k | |
| end | |
| end | |
| end | |
| {:score => best, :hit => winner} | |
| end | |
| result = query(query_sequence, db) | |
| puts result[:hit] |