Skip to content

Instantly share code, notes, and snippets.

@ergo70
Created April 17, 2019 10:10
Show Gist options
  • Save ergo70/a7ee432fe67db4a8f4124c6d5f5e589b to your computer and use it in GitHub Desktop.
Save ergo70/a7ee432fe67db4a8f4124c6d5f5e589b to your computer and use it in GitHub Desktop.
How to generate fingerprints for similarity screening
package org.theplateisbad.fp;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.BitSet;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.aromaticity.Aromaticity;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.fingerprint.IBitFingerprint;
import org.openscience.cdk.fingerprint.PubchemFingerprinter;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.CDKHydrogenAdder;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
public class FingerprintGenerator {
static {
try {
Class.forName("org.postgresql.Driver");
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) throws CDKException, CloneNotSupportedException, IOException, SQLException {
String url = "jdbc:postgresql://localhost/chemistry?user=postgres&password=";
CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance());
SmilesParser smilesParser = new SmilesParser(DefaultChemObjectBuilder.getInstance());
PubchemFingerprinter fpr = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance());
try (Connection conn = DriverManager.getConnection(url);
Statement query = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
PreparedStatement update = conn.prepareStatement(
"UPDATE cdk.externalfp SET pubchemfp = ?::bit varying, cardinality = ? WHERE id = ?",
ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE)) {
conn.setAutoCommit(false);
query.setFetchSize(10000);
ResultSet rows = query.executeQuery("SELECT id, smiles FROM cdk.externalfp");
while (rows.next()) {
int id = rows.getInt(1);
String smiles = rows.getString(2);
System.out.println(id + ":" + smiles);
IAtomContainer mol = smilesParser.parseSmiles(smiles);
AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol);
adder.addImplicitHydrogens(mol);
AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol);
Aromaticity.cdkLegacy().apply(mol);
IBitFingerprint fp = fpr.getBitFingerprint(mol);
String bitstring = fingerprint2bitString(fp);
update.setString(1, bitstring);
update.setInt(2, fp.cardinality());
update.setInt(3, id);
update.executeUpdate();
}
conn.commit();
rows.close();
conn.setAutoCommit(true);
}
}
private static String fingerprint2bitString(final IBitFingerprint fp) {
StringBuilder res = new StringBuilder();
BitSet fpBits = fp.asBitSet();
for (long i = 0; i < fp.size(); i++) {
if (true == fpBits.get((int) i)) {
res.append('1');
} else {
res.append('0');
}
}
return res.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment