Created
May 23, 2013 16:04
-
-
Save cknoxrun/5637216 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ######################################################################## | |
| # | |
| # This class finds properties of a compound, given the SMILES string. | |
| # For required programs, check the comments before each method. | |
| # | |
| # Written by Roman Eisner, 2010 | |
| # | |
| # Updated by Yannick Djoumbou | |
| ####################################################################### | |
| #require File.join("#{File.dirname(__FILE__)}/", 'chemical_entity') | |
| #require File.join("#{File.dirname(__FILE__)}/checkmatch/", 'surf') | |
| class ChemicalPropertyFinder | |
| @@property_cache_file = File.join(File.dirname(__FILE__), 'property_cache.txt') | |
| @@cached_properties = Hash.new | |
| def initialize(cache_features = true) | |
| @cache_features = cache_features | |
| if @cache_features | |
| if File.exists?(@@property_cache_file) | |
| File.open(@@property_cache_file, "r") do |infile| | |
| while (line = infile.gets) | |
| stuff = line.strip.split("\t") | |
| #properties are tab-delimeted | |
| id = stuff[0] | |
| @@cached_properties[id] = Hash.new | |
| stuff[1..-1].each do |prop| | |
| parts = prop.split(':') | |
| @@cached_properties[id][parts[0]] = parts[1] | |
| end | |
| end | |
| end | |
| end | |
| end | |
| end | |
| # ------------------ Methods to help with caching of properties | |
| def ChemicalPropertyFinder.save_property_cache | |
| open(@@property_cache_file, 'w') { |f| | |
| @@cached_properties.keys.each do |id| | |
| featstr = id | |
| props = @@cached_properties[id] | |
| props.keys.each do |name| | |
| val = props[name] | |
| featstr += "\t#{name}:#{val}" | |
| end | |
| f.puts featstr | |
| end | |
| } | |
| end | |
| def get_cached_prop(propname, smiles) | |
| if @cache_features and @@cached_properties.has_key?(smiles) and @@cached_properties[smiles].has_key?(propname) | |
| return @@cached_properties[smiles][propname] | |
| else | |
| nil | |
| end | |
| end | |
| def set_cached_prop(smiles, prop, val) | |
| @@cached_properties[smiles] ||= Hash.new | |
| @@cached_properties[smiles][prop] = val | |
| end | |
| # ------------------- Beginning of Property Methods: | |
| #Find checkmol features. Checkmol must be installed (http://merian.pch.univie.ac.at/~nhaider/cheminf/cmmm.html) | |
| def checkmol_features(smiles) | |
| sdf = create_sdf(smiles) | |
| command = "checkmol -e \"#{sdf}\"" | |
| features = Array.new | |
| IO.popen(command) { |f| | |
| while (s = f.gets) | |
| s = s.strip | |
| if s.length > 0 | |
| features << s | |
| end | |
| end | |
| } | |
| File.delete(sdf) | |
| return features | |
| end | |
| #Find checkmatch features. This is in the lib/checkmatch directory. | |
| def checkmatch_features(smiles) | |
| #sdf = "#{Dir.pwd}/#{create_sdf(smiles)}" | |
| currdir = Dir.pwd | |
| #Dir.chdir('../lib/') | |
| Dir.chdir("#{File.dirname(__FILE__)}/../lib") | |
| command = "ruby surf.rb -s \"#{smiles}\"" | |
| features = Array.new | |
| # centity=ChemicalEntity.new(smiles) | |
| # #$stderr.puts centity.smiles | |
| # surf_results=ssurf(centity) | |
| # puts "surf results===> #{surf_results}" | |
| # features<<surf_results[0] | |
| # surf_results[1].each do |f| | |
| # features<<f.downcase | |
| # end | |
| IO.popen(command) { |f| | |
| while (s = f.gets) | |
| #puts s | |
| s = s.strip | |
| if s.length > 0 | |
| if s.scan(/InChI=1S/).length>0 or s.include?("InChI=1S") | |
| features<<s | |
| else | |
| features << s.downcase | |
| end | |
| end | |
| end | |
| } | |
| #$stderr.puts "FEATURES =====> #{features}" | |
| Dir.chdir(currdir) | |
| #File.delete(sdf) | |
| return features | |
| end | |
| #Create an SDF file. molconvert is required ( ) | |
| def create_sdf(smiles) | |
| sdf = 'temp.sdf' | |
| system("molconvert sdf -s \"#{smiles}\" > #{sdf}") | |
| return sdf | |
| end | |
| #Find formula | |
| def formula(smiles) | |
| if @cache_features | |
| cached_prop = get_cached_prop('formula', smiles) | |
| end | |
| if cached_prop.nil? | |
| prop = find_property_chemaxon('formula', smiles) | |
| set_cached_prop(smiles, 'formula', prop) if @cache_features | |
| return prop | |
| else | |
| return cached_prop | |
| end | |
| end | |
| #Find mass | |
| def mass(smiles) | |
| if @cache_features | |
| cached_prop = get_cached_prop('mass', smiles).to_f | |
| end | |
| if cached_prop.nil? | |
| prop = find_property_chemaxon('mass', smiles).to_f | |
| set_cached_prop(smiles, 'mass', prop) if @cache_features | |
| return prop | |
| else | |
| return cached_prop | |
| end | |
| end | |
| #Find the counts of each atom given the chemical formula | |
| def atom_counts(formula) | |
| atoms = {} | |
| curr = "" | |
| currdig = "" | |
| #Get the number of each atom: | |
| formula.each_byte do |c| | |
| c = c.chr | |
| if c =~ /\d/ | |
| currdig += c | |
| elsif c =~ /[a-z,A-Z]/ | |
| if currdig.length > 0 | |
| atoms[curr] ||= 0 | |
| atoms[curr] += currdig.to_i | |
| curr = c | |
| currdig = "" | |
| else | |
| if curr.length > 0 | |
| if c =~ /[a-z]/ | |
| curr += c | |
| else | |
| atoms[curr] ||= 0 | |
| atoms[curr] += 1 | |
| curr = c | |
| end | |
| else | |
| curr = c | |
| end | |
| end | |
| else | |
| return nil | |
| end | |
| end | |
| if curr.length > 0 | |
| if currdig == '' | |
| currdig = 1 | |
| end | |
| atoms[curr] ||= 0 | |
| atoms[curr] += currdig.to_i | |
| end | |
| return atoms | |
| end | |
| private | |
| #(requires the cxcalc binary from Chemaxon: http://www.chemaxon.com/marvin/help/applications/cxcalc-calculations.html ) | |
| def find_property_chemaxon(property, smiles) | |
| command = "cxcalc -N ih #{property} '#{smiles}'" | |
| prop = nil | |
| IO.popen(command) { |f| | |
| s = f.gets | |
| if s.nil? | |
| prop = '' | |
| else | |
| s = s.strip | |
| if s.length > 0 | |
| prop = s | |
| end | |
| end | |
| } | |
| return prop | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment