Skip to content

Instantly share code, notes, and snippets.

@daenuprobst
Created July 6, 2023 12:19
Show Gist options
  • Save daenuprobst/e990674bd6f75a332a0637552e26cc06 to your computer and use it in GitHub Desktop.
Save daenuprobst/e990674bd6f75a332a0637552e26cc06 to your computer and use it in GitHub Desktop.
import pandas as pd
import tmap as tm
from faerun import Faerun
from mhfp.encoder import MHFPEncoder
from rdkit.Chem import AllChem
from tqdm import tqdm
# Setup
# - conda create -n tmap-env -c tmap tmap
# - conda activate tmap-env
# - pip install mhfp faerun rdkit-pypi tqdm
# Run
# python main.py
def load_data():
return pd.DataFrame.from_dict(
{
"id": [0, 1, 2, 3, 4, 5, 6, 7],
"smiles": [
"CNO",
"CCC",
"CNC",
"COC",
"CCN",
"C1CCCCC1",
"C1CCCCC1COC",
"CCNCNO",
],
"prop": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
}
)
def main():
#
# Load the data
#
df = load_data()
#
# Data encoding (using MHFP, other fingerprints may be chosen)
#
enc = MHFPEncoder()
lf = tm.LSHForest(2048, 128)
fps = []
labels = []
for _, row in tqdm(df.iterrows(), total=len(df), desc="Calculating fingerprints"):
smiles = row["smiles"]
mol = AllChem.MolFromSmiles(smiles)
fps.append(tm.VectorUint(enc.encode_mol(mol, min_radius=0)))
labels.append(smiles + "__" + str(row["id"]).replace("'", "´"))
#
# Index data using LSH (allows for faster KNN searches)
#
lf.batch_add(fps)
lf.index()
#
# Get TMAP embeddings from indexed fingerprints
#
cfg = tm.LayoutConfiguration()
cfg.k = 100
cfg.sl_repeats = 2
cfg.mmm_repeats = 2
# Make node_size smaller the more data there is (~repulsive force of nodes)
cfg.node_size = 2
x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg)
# Export to a HTML file using Faerun
f = Faerun(
clear_color="#222222",
coords=False,
view="front",
impress='made with <a href="http://tmap.gdb.tools" target="_blank">tmap</a><br />and <a href="https://github.com/reymond-group/faerun-python" target="_blank">faerun</a>',
)
f.add_scatter(
"Custom",
{
"x": x,
"y": y,
"c": [
df["prop"],
],
"labels": labels,
},
title_index=1,
categorical=[False],
colormap=[
"turbo",
],
has_legend=True,
series_title=[
"Some Property [mols/L]",
],
point_scale=5,
shader="smoothCircle",
)
f.add_tree("Custom_tree", {"from": s, "to": t}, point_helper="Custom")
f.plot(template="smiles")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment