Created
July 6, 2023 12:19
-
-
Save daenuprobst/e990674bd6f75a332a0637552e26cc06 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import tmap as tm | |
from faerun import Faerun | |
from mhfp.encoder import MHFPEncoder | |
from rdkit.Chem import AllChem | |
from tqdm import tqdm | |
# Setup | |
# - conda create -n tmap-env -c tmap tmap | |
# - conda activate tmap-env | |
# - pip install mhfp faerun rdkit-pypi tqdm | |
# Run | |
# python main.py | |
def load_data(): | |
return pd.DataFrame.from_dict( | |
{ | |
"id": [0, 1, 2, 3, 4, 5, 6, 7], | |
"smiles": [ | |
"CNO", | |
"CCC", | |
"CNC", | |
"COC", | |
"CCN", | |
"C1CCCCC1", | |
"C1CCCCC1COC", | |
"CCNCNO", | |
], | |
"prop": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7], | |
} | |
) | |
def main(): | |
# | |
# Load the data | |
# | |
df = load_data() | |
# | |
# Data encoding (using MHFP, other fingerprints may be chosen) | |
# | |
enc = MHFPEncoder() | |
lf = tm.LSHForest(2048, 128) | |
fps = [] | |
labels = [] | |
for _, row in tqdm(df.iterrows(), total=len(df), desc="Calculating fingerprints"): | |
smiles = row["smiles"] | |
mol = AllChem.MolFromSmiles(smiles) | |
fps.append(tm.VectorUint(enc.encode_mol(mol, min_radius=0))) | |
labels.append(smiles + "__" + str(row["id"]).replace("'", "´")) | |
# | |
# Index data using LSH (allows for faster KNN searches) | |
# | |
lf.batch_add(fps) | |
lf.index() | |
# | |
# Get TMAP embeddings from indexed fingerprints | |
# | |
cfg = tm.LayoutConfiguration() | |
cfg.k = 100 | |
cfg.sl_repeats = 2 | |
cfg.mmm_repeats = 2 | |
# Make node_size smaller the more data there is (~repulsive force of nodes) | |
cfg.node_size = 2 | |
x, y, s, t, _ = tm.layout_from_lsh_forest(lf, config=cfg) | |
# Export to a HTML file using Faerun | |
f = Faerun( | |
clear_color="#222222", | |
coords=False, | |
view="front", | |
impress='made with <a href="http://tmap.gdb.tools" target="_blank">tmap</a><br />and <a href="https://github.com/reymond-group/faerun-python" target="_blank">faerun</a>', | |
) | |
f.add_scatter( | |
"Custom", | |
{ | |
"x": x, | |
"y": y, | |
"c": [ | |
df["prop"], | |
], | |
"labels": labels, | |
}, | |
title_index=1, | |
categorical=[False], | |
colormap=[ | |
"turbo", | |
], | |
has_legend=True, | |
series_title=[ | |
"Some Property [mols/L]", | |
], | |
point_scale=5, | |
shader="smoothCircle", | |
) | |
f.add_tree("Custom_tree", {"from": s, "to": t}, point_helper="Custom") | |
f.plot(template="smiles") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment