Skip to content

Instantly share code, notes, and snippets.

@greglandrum
Created January 19, 2014 06:56
Show Gist options
  • Save greglandrum/8501410 to your computer and use it in GitHub Desktop.
Save greglandrum/8501410 to your computer and use it in GitHub Desktop.
RDKit MaxMinPicker example
//
// Copyright (C) 2014 Greg Landrum
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
/*
Can be built with:
g++ -o maxmin_demo.exe maxmin_demo.cpp -I$RDBASE/Code -I$RDBASE/Extern \
-L$RDBASE/lib -lFingerprints -lFileParsers -lSimDivPickers \
-lGraphMol -lDataStructs -lRDGeometryLib -lRDGeneral
*/
#include <RDGeneral/Invariant.h>
#include <GraphMol/RDKitBase.h>
#include <GraphMol/FileParsers/MolSupplier.h>
#include <GraphMol/Fingerprints/MorganFingerprints.h>
#include <DataStructs/ExplicitBitVect.h>
#include <DataStructs/BitOps.h>
#include <SimDivPickers/MaxMinPicker.h>
#include <RDGeneral/RDLog.h>
#include <vector>
#include <algorithm>
#include <boost/shared_ptr.hpp>
using namespace RDKit;
void LoadAndFingerprintMols(std::string filename,
std::vector<boost::shared_ptr<ExplicitBitVect> > &fps){
BOOST_LOG(rdInfoLog)<<"Load Mols"<<std::endl;
SDMolSupplier suppl(filename);
while(!suppl.atEnd()){
ROMol *mol=suppl.next();
ExplicitBitVect *fp=MorganFingerprints::getFingerprintAsBitVect(*mol,2,2048);
fps.push_back(boost::shared_ptr<ExplicitBitVect>(fp));
if(!(fps.size()%1000)){
BOOST_LOG(rdInfoLog)<<" done: "<<fps.size()<<std::endl;
}
delete mol;
}
}
struct distFunctor{
const std::vector<boost::shared_ptr<ExplicitBitVect> > &_fps;
distFunctor(const std::vector<boost::shared_ptr<ExplicitBitVect> > &fps) : _fps(fps) {};
double operator()(unsigned int i,unsigned int j){
return 1.-TanimotoSimilarity(*_fps[i].get(),*_fps[j].get());
}
};
void LazyPickSubset(const std::vector<boost::shared_ptr<ExplicitBitVect> > &fps,
INT_VECT &picks,unsigned int nToPick){
BOOST_LOG(rdInfoLog)<<"LazyPickSubset()"<<std::endl;
unsigned int n=fps.size();
distFunctor df(fps);
RDPickers::MaxMinPicker picker;
BOOST_LOG(rdInfoLog)<<" picking"<<std::endl;
INT_VECT seedPicks;
picks = picker.lazyPick(df,n,nToPick,seedPicks,0xF00D);
BOOST_LOG(rdInfoLog)<<" done"<<std::endl;
}
void PickSubset(const std::vector<boost::shared_ptr<ExplicitBitVect> > &fps,
INT_VECT &picks,unsigned int nToPick){
BOOST_LOG(rdInfoLog)<<"PickSubset()"<<std::endl;
unsigned int n=fps.size();
double *dm = new double[n*(n-1)];
unsigned int idx=0;
BOOST_LOG(rdInfoLog)<<" generating distance matrix"<<std::endl;
for(unsigned int i=0;i<n;++i){
for(unsigned int j=i+1;j<n;++j){
dm[idx++] = 1.-TanimotoSimilarity(*fps[i].get(),*fps[j].get());
}
}
BOOST_LOG(rdInfoLog)<<" done"<<std::endl;
RDPickers::MaxMinPicker picker;
BOOST_LOG(rdInfoLog)<<" picking"<<std::endl;
INT_VECT seedPicks;
picks = picker.pick(dm,n,nToPick,seedPicks,0xF00D);
BOOST_LOG(rdInfoLog)<<" done"<<std::endl;
delete [] dm;
}
int
main(int argc, char *argv[])
{
RDLog::InitLogs();
std::vector<boost::shared_ptr<ExplicitBitVect> > fps;
LoadAndFingerprintMols("znp.sdf",fps);
INT_VECT picks;
//PickSubset(fps,picks,20);
LazyPickSubset(fps,picks,100);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment