Last active
February 25, 2022 14:41
-
-
Save MichelML/3a6c9b1d795d18785c743c6aec4fbb67 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datamol as dm | |
# test structures taken from | |
# https://www.rdkit.org/docs/GettingStartedInPython.html#substructure-searching | |
test_smiles = "c1ccccc1O" | |
test_statement_substruct = "ccO" | |
_match_type_checkers = { | |
"WR": lambda matches_count, match_n_val, match_n_second_val: match_n_val | |
<= matches_count | |
<= match_n_second_val, | |
"EQ": lambda matches_count, match_n_val, match_n_second_val: match_n_val | |
== matches_count, | |
"LT": lambda matches_count, match_n_val, match_n_second_val: matches_count | |
< match_n_val, | |
"LTE": lambda matches_count, match_n_val, match_n_second_val: matches_count | |
<= match_n_val, | |
"GT": lambda matches_count, match_n_val, match_n_second_val: matches_count | |
> match_n_val, | |
"GTE": lambda matches_count, match_n_val, match_n_second_val: matches_count | |
>= match_n_val, | |
} | |
_bool_operators_map = { | |
"AND": " and", | |
"OR": " or", | |
None: "" | |
} | |
def is_matching_filter_statement(input_mol: dm.Mol, statement): | |
"""Matching function for a single statement within a single or multi-statements chemical filter | |
Args: | |
input_mol: dm.Mol | |
statement: _description_ | |
Returns: | |
bool: True if the current smiles passes the filter statement, False if not | |
""" | |
logic_type = statement["logic_type"] # include/exclude | |
substruct = statement["structure"] | |
is_smarts = statement["is_smarts"] | |
match_type = statement[ | |
"match_type" | |
] # within range/equal/lower than/lower or equal than/greater than/greater or equal than | |
match_n_val = statement["match_n_val"] | |
match_n_second_val = statement.get("match_n_second_val") # maybe None | |
qmol = dm.from_smarts(substruct) if is_smarts else dm.to_mol(substruct) | |
substruct_matches = input_mol.GetSubstructMatches(qmol) | |
matches_count = len(substruct_matches) | |
try: | |
is_matching = _match_type_checkers[match_type]( | |
matches_count, match_n_val, match_n_second_val | |
) | |
except: | |
raise Exception(f"Match type specified, {match_type}, does not exist") | |
if logic_type not in ["include", "exclude"]: | |
raise Exception(f"Logic type specified, {logic_type}, does not exist") | |
return is_matching if logic_type == "include" else not is_matching | |
def is_matching_filter(input_mol: dm.Mol, statements=[]): | |
"""Matching function for list of statements of a chemical filter. | |
It is implied that there is no nested logic. | |
Args: | |
input_mol: dm.Mol | |
statements: list of statement | |
Returns: | |
bool: True if the current mol passes the chemical filter, False if not | |
""" | |
if len(statements) == 0: | |
return True | |
bool_chain = "" | |
for statement in statements: | |
is_single_match = is_mol_matching_filter_statement(input_mol, statement) | |
operator = _bool_operators_map[statement.get("chaining_operator")] | |
bool_chain += f"{is_single_match}{operator}" | |
return eval(bool_chain) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment