Created
April 17, 2019 10:10
-
-
Save ergo70/a7ee432fe67db4a8f4124c6d5f5e589b to your computer and use it in GitHub Desktop.
How to generate fingerprints for similarity screening
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.theplateisbad.fp; | |
import java.io.IOException; | |
import java.sql.Connection; | |
import java.sql.DriverManager; | |
import java.sql.PreparedStatement; | |
import java.sql.ResultSet; | |
import java.sql.SQLException; | |
import java.sql.Statement; | |
import java.util.BitSet; | |
import org.openscience.cdk.DefaultChemObjectBuilder; | |
import org.openscience.cdk.aromaticity.Aromaticity; | |
import org.openscience.cdk.exception.CDKException; | |
import org.openscience.cdk.fingerprint.IBitFingerprint; | |
import org.openscience.cdk.fingerprint.PubchemFingerprinter; | |
import org.openscience.cdk.interfaces.IAtomContainer; | |
import org.openscience.cdk.smiles.SmilesParser; | |
import org.openscience.cdk.tools.CDKHydrogenAdder; | |
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator; | |
public class FingerprintGenerator { | |
static { | |
try { | |
Class.forName("org.postgresql.Driver"); | |
} catch (ClassNotFoundException e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void main(String[] args) throws CDKException, CloneNotSupportedException, IOException, SQLException { | |
String url = "jdbc:postgresql://localhost/chemistry?user=postgres&password="; | |
CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance()); | |
SmilesParser smilesParser = new SmilesParser(DefaultChemObjectBuilder.getInstance()); | |
PubchemFingerprinter fpr = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); | |
try (Connection conn = DriverManager.getConnection(url); | |
Statement query = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); | |
PreparedStatement update = conn.prepareStatement( | |
"UPDATE cdk.externalfp SET pubchemfp = ?::bit varying, cardinality = ? WHERE id = ?", | |
ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE)) { | |
conn.setAutoCommit(false); | |
query.setFetchSize(10000); | |
ResultSet rows = query.executeQuery("SELECT id, smiles FROM cdk.externalfp"); | |
while (rows.next()) { | |
int id = rows.getInt(1); | |
String smiles = rows.getString(2); | |
System.out.println(id + ":" + smiles); | |
IAtomContainer mol = smilesParser.parseSmiles(smiles); | |
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); | |
adder.addImplicitHydrogens(mol); | |
AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); | |
Aromaticity.cdkLegacy().apply(mol); | |
IBitFingerprint fp = fpr.getBitFingerprint(mol); | |
String bitstring = fingerprint2bitString(fp); | |
update.setString(1, bitstring); | |
update.setInt(2, fp.cardinality()); | |
update.setInt(3, id); | |
update.executeUpdate(); | |
} | |
conn.commit(); | |
rows.close(); | |
conn.setAutoCommit(true); | |
} | |
} | |
private static String fingerprint2bitString(final IBitFingerprint fp) { | |
StringBuilder res = new StringBuilder(); | |
BitSet fpBits = fp.asBitSet(); | |
for (long i = 0; i < fp.size(); i++) { | |
if (true == fpBits.get((int) i)) { | |
res.append('1'); | |
} else { | |
res.append('0'); | |
} | |
} | |
return res.toString(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment