Skip to content

Instantly share code, notes, and snippets.

@cth
Created May 27, 2009 13:22
Show Gist options
  • Save cth/118638 to your computer and use it in GitHub Desktop.
Save cth/118638 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
# Point it to the directory where the genbank files are and it will generate
# Prolog representations of the files.
require 'rubygems'
require 'bio'
class String
def to_prolog
"'" + self.gsub("'",'\\\\\'') + "'"
end
end
class Array
def to_prolog
"[" + (self.map { |e| e.to_prolog}).join(',') + "]"
end
end
class Genome2Prolog
def initialize(datadir, outputfile)
@datadir, @outputfile = datadir, outputfile
@gene_facts = []
@genome_facts = []
process_dir(datadir)
write_prolog_file(outputfile)
end
def process_dir(dir)
Dir.open(dir).each do |f|
process_individual_genes(Bio::FastaFormat.open(dir+"/"+f)) if f =~ /.*\.ffn/
process_genomes(Bio::FastaFormat.open(dir+"/"+f)) if f =~ /.*\.fna/
end
end
def process_genomes(genomes)
genomes.each { |genome| @genome_facts << genome2prolog(genome) }
end
def process_individual_genes(genes)
genes.each { |gene| @gene_facts << gene2prolog(gene) }
end
def gene2prolog(gene)
defline = Bio::FastaDefline.new(gene.definition)
if defline.list_ids[0][2] =~ /:(c?)(\d+)-(\d+)/
"gene('#{defline.to_s}',#{$2},#{$3},#{($1=="c" ? "complementary" : "primary")}, #{gene.data.gsub("\n", "").downcase.split("").to_a.to_prolog})."
end
end
def genome2prolog(genome)
defline = Bio::FastaDefline.new(genome.definition)
"genome(#{defline.list_ids[0][1]}, #{genome.data.gsub("\n", "").downcase.split("").to_a.to_prolog})."
end
def write_prolog_file(filename)
File.open(filename, "w") do |f|
f << @gene_facts.sort.join("\n")
f << "\n%% Complete genome: \n"
f << @genome_facts.sort.join("\n")
end
end
end
datadir = ARGV.shift
outputfile = ARGV.shift
g2p = Genome2Prolog.new(datadir,outputfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment