Skip to content

Instantly share code, notes, and snippets.

@cknoxrun
Created May 23, 2013 16:04
Show Gist options
  • Save cknoxrun/5637216 to your computer and use it in GitHub Desktop.
Save cknoxrun/5637216 to your computer and use it in GitHub Desktop.
########################################################################
#
# This class finds properties of a compound, given the SMILES string.
# For required programs, check the comments before each method.
#
# Written by Roman Eisner, 2010
#
# Updated by Yannick Djoumbou
#######################################################################
#require File.join("#{File.dirname(__FILE__)}/", 'chemical_entity')
#require File.join("#{File.dirname(__FILE__)}/checkmatch/", 'surf')
class ChemicalPropertyFinder
@@property_cache_file = File.join(File.dirname(__FILE__), 'property_cache.txt')
@@cached_properties = Hash.new
def initialize(cache_features = true)
@cache_features = cache_features
if @cache_features
if File.exists?(@@property_cache_file)
File.open(@@property_cache_file, "r") do |infile|
while (line = infile.gets)
stuff = line.strip.split("\t")
#properties are tab-delimeted
id = stuff[0]
@@cached_properties[id] = Hash.new
stuff[1..-1].each do |prop|
parts = prop.split(':')
@@cached_properties[id][parts[0]] = parts[1]
end
end
end
end
end
end
# ------------------ Methods to help with caching of properties
def ChemicalPropertyFinder.save_property_cache
open(@@property_cache_file, 'w') { |f|
@@cached_properties.keys.each do |id|
featstr = id
props = @@cached_properties[id]
props.keys.each do |name|
val = props[name]
featstr += "\t#{name}:#{val}"
end
f.puts featstr
end
}
end
def get_cached_prop(propname, smiles)
if @cache_features and @@cached_properties.has_key?(smiles) and @@cached_properties[smiles].has_key?(propname)
return @@cached_properties[smiles][propname]
else
nil
end
end
def set_cached_prop(smiles, prop, val)
@@cached_properties[smiles] ||= Hash.new
@@cached_properties[smiles][prop] = val
end
# ------------------- Beginning of Property Methods:
#Find checkmol features. Checkmol must be installed (http://merian.pch.univie.ac.at/~nhaider/cheminf/cmmm.html)
def checkmol_features(smiles)
sdf = create_sdf(smiles)
command = "checkmol -e \"#{sdf}\""
features = Array.new
IO.popen(command) { |f|
while (s = f.gets)
s = s.strip
if s.length > 0
features << s
end
end
}
File.delete(sdf)
return features
end
#Find checkmatch features. This is in the lib/checkmatch directory.
def checkmatch_features(smiles)
#sdf = "#{Dir.pwd}/#{create_sdf(smiles)}"
currdir = Dir.pwd
#Dir.chdir('../lib/')
Dir.chdir("#{File.dirname(__FILE__)}/../lib")
command = "ruby surf.rb -s \"#{smiles}\""
features = Array.new
# centity=ChemicalEntity.new(smiles)
# #$stderr.puts centity.smiles
# surf_results=ssurf(centity)
# puts "surf results===> #{surf_results}"
# features<<surf_results[0]
# surf_results[1].each do |f|
# features<<f.downcase
# end
IO.popen(command) { |f|
while (s = f.gets)
#puts s
s = s.strip
if s.length > 0
if s.scan(/InChI=1S/).length>0 or s.include?("InChI=1S")
features<<s
else
features << s.downcase
end
end
end
}
#$stderr.puts "FEATURES =====> #{features}"
Dir.chdir(currdir)
#File.delete(sdf)
return features
end
#Create an SDF file. molconvert is required ( )
def create_sdf(smiles)
sdf = 'temp.sdf'
system("molconvert sdf -s \"#{smiles}\" > #{sdf}")
return sdf
end
#Find formula
def formula(smiles)
if @cache_features
cached_prop = get_cached_prop('formula', smiles)
end
if cached_prop.nil?
prop = find_property_chemaxon('formula', smiles)
set_cached_prop(smiles, 'formula', prop) if @cache_features
return prop
else
return cached_prop
end
end
#Find mass
def mass(smiles)
if @cache_features
cached_prop = get_cached_prop('mass', smiles).to_f
end
if cached_prop.nil?
prop = find_property_chemaxon('mass', smiles).to_f
set_cached_prop(smiles, 'mass', prop) if @cache_features
return prop
else
return cached_prop
end
end
#Find the counts of each atom given the chemical formula
def atom_counts(formula)
atoms = {}
curr = ""
currdig = ""
#Get the number of each atom:
formula.each_byte do |c|
c = c.chr
if c =~ /\d/
currdig += c
elsif c =~ /[a-z,A-Z]/
if currdig.length > 0
atoms[curr] ||= 0
atoms[curr] += currdig.to_i
curr = c
currdig = ""
else
if curr.length > 0
if c =~ /[a-z]/
curr += c
else
atoms[curr] ||= 0
atoms[curr] += 1
curr = c
end
else
curr = c
end
end
else
return nil
end
end
if curr.length > 0
if currdig == ''
currdig = 1
end
atoms[curr] ||= 0
atoms[curr] += currdig.to_i
end
return atoms
end
private
#(requires the cxcalc binary from Chemaxon: http://www.chemaxon.com/marvin/help/applications/cxcalc-calculations.html )
def find_property_chemaxon(property, smiles)
command = "cxcalc -N ih #{property} '#{smiles}'"
prop = nil
IO.popen(command) { |f|
s = f.gets
if s.nil?
prop = ''
else
s = s.strip
if s.length > 0
prop = s
end
end
}
return prop
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment