Created
January 22, 2025 01:46
-
-
Save park-brian/c5ecb9c39cab077b922d4b877ec77723 to your computer and use it in GitHub Desktop.
substructure-search.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
SMILES Rules: | |
Atoms: | |
Represented by their atomic symbols. | |
Common organic elements (B, C, N, O, P, S, F, Cl, Br, I) can be written without brackets if they have no formal charge and the number of attached hydrogens is implied by typical valence. | |
Atoms outside this set or with explicit hydrogens, charges, or isotopes are enclosed in brackets, e.g., [Fe], [OH2], [13C]. | |
Bonds: | |
Single bonds: - (often omitted). | |
Double bonds: =. | |
Triple bonds: #. | |
Aromatic bonds: :. | |
Adjacent atoms are assumed to be connected by single bonds unless specified otherwise. | |
Branches: | |
Parentheses () indicate branching in the molecular structure. | |
Rings: | |
Numbers are used to indicate the start and end of ring structures. | |
For example, C1CCCCC1 represents cyclohexane, where the number 1 indicates the ring closure between the first and last carbon atoms. | |
Aromaticity: | |
Aromatic atoms are represented by lowercase letters, e.g., c1ccccc1 denotes benzene. | |
Isotopes and Charges: | |
Isotopes are specified by preceding the atomic symbol with the mass number in square brackets, e.g., [13C]. | |
Charges are indicated by + or - signs; for example, [NH4+] represents the ammonium ion. | |
SMARTS Rules: | |
Atomic Symbols: | |
Standard atomic symbols are used. | |
Wildcards: | |
* matches any atom. | |
A matches any aliphatic (non-aromatic) atom. | |
a matches any aromatic atom. | |
Logical operators can combine atomic properties, e.g., [C,N] matches any carbon or nitrogen atom. | |
Bond Symbols: | |
Single bond: -. | |
Double bond: =. | |
Triple bond: #. | |
Aromatic bond: :. | |
Any bond: ~. | |
Logical Operators: | |
; (and): Specifies that an atom must satisfy all the given conditions. | |
, (or): Specifies that an atom can satisfy any one of the given conditions. | |
! (not): Specifies that an atom must not satisfy the given condition. | |
Recursive SMARTS: | |
Allows for the definition of more complex patterns by including entire SMARTS patterns within others. | |
Useful for specifying environments around a particular atom or group. | |
*/ | |
class Atom { | |
constructor(id, element, aromatic = false) { | |
this.id = id; | |
this.element = element; | |
this.aromatic = aromatic; | |
this.bonds = []; | |
this.isotope = null; | |
this.charge = null; | |
this.hydrogens = null; | |
this.used = false; | |
this.inRing = false; | |
this.molecule = null; // Added molecule reference | |
} | |
addBond(toAtom, bondType = 1) { | |
this.bonds.push({ toAtom, bondType }); | |
toAtom.bonds.push({ toAtom: this, bondType }); | |
} | |
} | |
class QueryAtom { | |
constructor(id, query) { | |
this.id = id; | |
this.query = query; | |
this.bonds = []; | |
} | |
addBond(toAtom, bondType) { | |
this.bonds.push({ toAtom, bondType }); | |
toAtom.bonds.push({ toAtom: this, bondType }); | |
} | |
} | |
class SmilesParser { | |
parse(smiles) { | |
const atoms = []; | |
let current = null; | |
const stack = []; | |
const ringClosures = {}; | |
let bondType = 1; | |
let i = 0; | |
while (i < smiles.length) { | |
const c = smiles[i]; | |
if (c === "(") { | |
stack.push(current); | |
i++; | |
continue; | |
} | |
if (c === ")") { | |
current = stack.pop(); | |
bondType = 1; // Reset bond type after closing branch | |
i++; | |
continue; | |
} | |
if (["=", "#", ":", "~"].includes(c)) { | |
bondType = this.parseBondType(c); | |
i++; | |
continue; | |
} | |
if (c === "%") { | |
i++; | |
const num = this.parseNumber(smiles, i); | |
if (num === "") throw new Error("Invalid ring number after %"); | |
i += num.length; | |
const n = parseInt(num, 10); | |
if (ringClosures.hasOwnProperty(n)) { | |
const closureAtomId = ringClosures[n]; | |
current.addBond(atoms[closureAtomId], bondType); | |
bondType = 1; | |
delete ringClosures[n]; | |
} else { | |
ringClosures[n] = current.id; | |
} | |
continue; | |
} | |
if (/\d/.test(c)) { | |
const num = this.parseNumber(smiles, i); | |
i += num.length; | |
const n = parseInt(num, 10); | |
if (ringClosures.hasOwnProperty(n)) { | |
const closureAtomId = ringClosures[n]; | |
current.addBond(atoms[closureAtomId], bondType); | |
bondType = 1; | |
delete ringClosures[n]; | |
} else { | |
ringClosures[n] = current.id; | |
} | |
continue; | |
} | |
if (c === "[") { | |
const result = this.parseBracketedAtom(smiles, i); | |
const atom = new Atom(atoms.length, result.element); | |
atom.isotope = result.isotope; | |
atom.charge = result.charge; | |
atom.hydrogens = result.hydrogens; | |
atom.aromatic = result.aromatic; | |
atoms.push(atom); | |
this.connectAtom(current, atom, bondType); | |
current = atom; | |
i += result.length; | |
continue; | |
} | |
const { element, aromatic, length } = this.parseElement(smiles, i); | |
const atom = new Atom(atoms.length, element, aromatic); | |
atoms.push(atom); | |
this.connectAtom(current, atom, bondType); | |
current = atom; | |
i += length; | |
} | |
const molecule = { atoms }; | |
atoms.forEach(atom => { | |
atom.molecule = molecule; // Assign molecule reference | |
}); | |
this.detectRings(atoms); | |
this.computeImplicitHydrogens(atoms); | |
return molecule; | |
} | |
computeImplicitHydrogens(atoms) { | |
const organicElements = new Set(['B', 'C', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I']); | |
for (const atom of atoms) { | |
if (atom.hydrogens === null && organicElements.has(atom.element)) { | |
let valence = 0; | |
for (const bond of atom.bonds) { | |
const bondType = bond.bondType; | |
if (bondType === 'aromatic') { | |
valence += 1; | |
} else if (typeof bondType === 'number') { | |
valence += bondType; | |
} else { | |
valence += 1; | |
} | |
} | |
const defaultValence = { | |
'B': 3, | |
'C': 4, | |
'N': 3, | |
'O': 2, | |
'P': 3, | |
'S': 2, | |
'F': 1, | |
'Cl': 1, | |
'Br': 1, | |
'I': 1 | |
}[atom.element]; | |
const charge = atom.charge || 0; | |
const adjustedValence = valence - charge; | |
const implicitH = defaultValence - adjustedValence; | |
atom.hydrogens = Math.max(implicitH, 0); | |
} | |
} | |
} | |
detectRings(atoms) { | |
const adj = new Map(); | |
const degrees = new Map(); | |
atoms.forEach((atom) => { | |
adj.set(atom.id, []); | |
degrees.set(atom.id, 0); | |
}); | |
atoms.forEach((atom) => { | |
atom.bonds.forEach((bond) => { | |
const neighbor = bond.toAtom; | |
adj.get(atom.id).push(neighbor.id); | |
adj.get(neighbor.id).push(atom.id); | |
degrees.set(atom.id, degrees.get(atom.id) + 1); | |
degrees.set(neighbor.id, degrees.get(neighbor.id) + 1); | |
}); | |
}); | |
const queue = []; | |
degrees.forEach((degree, id) => { | |
if (degree === 1) queue.push(id); | |
}); | |
while (queue.length > 0) { | |
const leafId = queue.shift(); | |
degrees.set(leafId, 0); | |
const neighbors = adj.get(leafId); | |
for (const neighborId of neighbors) { | |
if (degrees.get(neighborId) > 0) { | |
degrees.set(neighborId, degrees.get(neighborId) - 1); | |
if (degrees.get(neighborId) === 1) queue.push(neighborId); | |
} | |
} | |
} | |
atoms.forEach((atom) => { | |
atom.inRing = degrees.get(atom.id) > 0; | |
}); | |
} | |
parseElement(smiles, i) { | |
let element = ""; | |
let aromatic = false; | |
const c = smiles[i]; | |
if (c === c.toLowerCase()) { | |
aromatic = true; | |
element = c.toUpperCase(); | |
i++; | |
} else { | |
element = c; | |
i++; | |
if (i < smiles.length && /[a-z]/.test(smiles[i])) { | |
element += smiles[i]; | |
i++; | |
} | |
} | |
return { element, aromatic, length: element.length }; | |
} | |
parseBracketedAtom(smiles, start) { | |
let i = start + 1; | |
let isotope = null, | |
element = "", | |
hydrogens = null, | |
charge = 0, | |
aromatic = false; | |
let isotopeStr = ""; | |
while (i < smiles.length && /\d/.test(smiles[i])) { | |
isotopeStr += smiles[i++]; | |
} | |
if (isotopeStr) isotope = parseInt(isotopeStr); | |
if (i < smiles.length && smiles[i] === "@") { | |
i++; | |
} | |
const c = smiles[i]; | |
if (/[A-Za-z]/.test(c)) { | |
if (c === c.toLowerCase()) { | |
aromatic = true; | |
element = c.toUpperCase(); | |
i++; | |
} else { | |
element = c; | |
i++; | |
if (i < smiles.length && /[a-z]/.test(smiles[i])) { | |
element += smiles[i++]; | |
} | |
} | |
} | |
if (i < smiles.length && smiles[i] === "H") { | |
i++; | |
let hCount = ""; | |
while (i < smiles.length && /\d/.test(smiles[i])) { | |
hCount += smiles[i++]; | |
} | |
hydrogens = hCount ? parseInt(hCount) : 1; | |
} | |
if (i < smiles.length && (smiles[i] === "+" || smiles[i] === "-")) { | |
const sign = smiles[i++]; | |
let chargeStr = ""; | |
while (i < smiles.length && /\d/.test(smiles[i])) { | |
chargeStr += smiles[i++]; | |
} | |
if (chargeStr) { | |
charge = parseInt(chargeStr) * (sign === "+" ? 1 : -1); | |
} else { | |
let signCount = 1; | |
while (i < smiles.length && smiles[i] === sign) { | |
signCount++; | |
i++; | |
} | |
charge = sign === "+" ? signCount : -signCount; | |
} | |
} | |
while (i < smiles.length && smiles[i] !== "]") i++; | |
i++; | |
return { element, isotope, hydrogens, charge, aromatic, length: i - start }; | |
} | |
connectAtom(fromAtom, toAtom, bondType) { | |
if (fromAtom) { | |
if (bondType === 1 && fromAtom.aromatic && toAtom.aromatic) { | |
bondType = "aromatic"; | |
} | |
fromAtom.addBond(toAtom, bondType); | |
} | |
} | |
parseBondType(c) { | |
return { "=": 2, "#": 3, ":": "aromatic", "~": "any" }[c] || 1; | |
} | |
parseNumber(smiles, i) { | |
let num = ""; | |
while (i < smiles.length && /\d/.test(smiles[i])) num += smiles[i++]; | |
return num; | |
} | |
} | |
class SmartsParser extends SmilesParser { | |
splitGroups(str, delimiter) { | |
let depth = 0; | |
let group = ""; | |
const groups = []; | |
for (const c of str) { | |
if (c === "(") depth++; | |
else if (c === ")") depth--; | |
if (c === delimiter && depth === 0) { | |
groups.push(group.trim()); | |
group = ""; | |
} else { | |
group += c; | |
} | |
} | |
if (group.trim()) groups.push(group.trim()); | |
return groups.filter((g) => g.length > 0); | |
} | |
parse(smarts) { | |
const atoms = []; | |
let current = null; | |
const stack = []; | |
const ringClosures = {}; | |
let bondType = 1; | |
let i = 0; | |
while (i < smarts.length) { | |
const c = smarts[i]; | |
if (c === "(") { | |
stack.push(current); | |
i++; | |
continue; | |
} | |
if (c === ")") { | |
current = stack.pop(); | |
bondType = 1; // Reset bond type after closing branch | |
i++; | |
continue; | |
} | |
if (["-", "=", "#", ":", "~"].includes(c)) { | |
bondType = this.parseBondType(c); | |
i++; | |
continue; | |
} | |
if (c === "%") { | |
i++; | |
const num = this.parseNumber(smarts, i); | |
if (num === "") throw new Error("Invalid ring number after %"); | |
i += num.length; | |
const n = parseInt(num); | |
if (ringClosures[n]) { | |
current.addBond(atoms[ringClosures[n]], bondType); | |
bondType = 1; | |
delete ringClosures[n]; | |
} else { | |
ringClosures[n] = current.id; | |
} | |
continue; | |
} | |
if (/\d/.test(c)) { | |
const num = this.parseNumber(smarts, i); | |
i += num.length; | |
const n = parseInt(num); | |
if (ringClosures[n]) { | |
current.addBond(atoms[ringClosures[n]], bondType); | |
bondType = 1; | |
delete ringClosures[n]; | |
} else { | |
ringClosures[n] = current.id; | |
} | |
continue; | |
} | |
if (c === "[") { | |
const query = this.parseSmartsAtom(smarts, i); | |
const atom = new QueryAtom(atoms.length, query); | |
atoms.push(atom); | |
this.connectQueryAtom(current, atom, bondType); | |
current = atom; | |
i += query.length; | |
continue; | |
} | |
if (c === "*") { | |
const atom = new QueryAtom(atoms.length, { type: "wildcard" }); | |
atoms.push(atom); | |
this.connectQueryAtom(current, atom, bondType); | |
current = atom; | |
i++; | |
continue; | |
} | |
const { element, aromatic, length } = this.parseElement(smarts, i); | |
if (element === "A") { | |
const queryType = aromatic ? "aromatic" : "aliphatic"; | |
const atom = new QueryAtom(atoms.length, { type: queryType }); | |
atoms.push(atom); | |
this.connectQueryAtom(current, atom, bondType); | |
current = atom; | |
i += length; | |
continue; | |
} | |
const atom = new QueryAtom(atoms.length, { | |
type: "element", | |
value: element, | |
aromatic, | |
}); | |
atoms.push(atom); | |
this.connectQueryAtom(current, atom, bondType); | |
current = atom; | |
i += length; | |
} | |
return { atoms }; | |
} | |
parseSmartsAtom(smarts, start) { | |
let i = start + 1; | |
const content = []; | |
let depth = 0; | |
while (i < smarts.length) { | |
const c = smarts[i]; | |
if (c === "[") depth++; | |
if (c === "]") { | |
if (depth === 0) break; | |
depth--; | |
} | |
content.push(c); | |
i++; | |
} | |
i++; | |
const atomStr = content.join(""); | |
const andGroups = this.splitGroups(atomStr, ";"); | |
const query = { type: "and", conditions: [] }; | |
for (const andGroup of andGroups) { | |
const orConditions = this.splitGroups(andGroup, ","); | |
const parsedOrConditions = []; | |
for (const orCond of orConditions) { | |
let conditionStr = orCond.trim(); | |
if (!conditionStr) continue; | |
let negate = false; | |
if (conditionStr.startsWith("!")) { | |
negate = true; | |
conditionStr = conditionStr.slice(1).trim(); | |
} | |
const condition = this.parseAtomPart(conditionStr); | |
if (negate) { | |
parsedOrConditions.push({ type: "not", condition }); | |
} else { | |
parsedOrConditions.push(condition); | |
} | |
} | |
if (parsedOrConditions.length === 0) continue; | |
const orCondition = parsedOrConditions.length === 1 ? parsedOrConditions[0] : { type: "or", conditions: parsedOrConditions }; | |
query.conditions.push(orCondition); | |
} | |
query.length = i - start; | |
return query; | |
} | |
parseAtomPart(part) { | |
const isotopeMatch = part.match(/^(\d+)([A-Z][a-z]?)$/); | |
if (isotopeMatch) { | |
return { | |
type: "isotope", | |
isotope: parseInt(isotopeMatch[1]), | |
element: isotopeMatch[2], | |
}; | |
} | |
const elementHMatch = part.match(/^([A-Z][a-z]?)H(\d*)$/i); | |
if (elementHMatch) { | |
const element = elementHMatch[1].toUpperCase(); | |
const hCountStr = elementHMatch[2]; | |
const hCount = hCountStr ? parseInt(hCountStr, 10) : 1; | |
return { | |
type: "and", | |
conditions: [ | |
{ type: "element", value: element }, | |
{ type: "hydrogens", count: hCount } | |
] | |
}; | |
} | |
if (part.startsWith("$(") && part.endsWith(")")) { | |
const innerSmarts = part.slice(2, -1); | |
const parser = new SmartsParser(); | |
const query = parser.parse(innerSmarts); | |
return { type: "recursive", query }; | |
} | |
if (part === "R") return { type: "ring" }; | |
if (/^[A-Z][a-z]?$/.test(part)) { | |
return { type: "element", value: part }; | |
} | |
return { type: "unknown", value: part }; | |
} | |
connectQueryAtom(fromAtom, toAtom, bondType) { | |
if (fromAtom) fromAtom.addBond(toAtom, bondType); | |
} | |
} | |
function atomMatches(molAtom, queryAtom) { | |
return evaluateCondition(molAtom, queryAtom.query); | |
} | |
function evaluateCondition(molAtom, condition) { | |
if (!condition) return false; | |
switch (condition.type) { | |
case "and": | |
return condition.conditions.every((c) => evaluateCondition(molAtom, c)); | |
case "or": | |
return condition.conditions.some((c) => evaluateCondition(molAtom, c)); | |
case "not": | |
return !evaluateCondition(molAtom, condition.condition); | |
case "element": | |
return molAtom.element === condition.value && (condition.aromatic === undefined || molAtom.aromatic === condition.aromatic); | |
case "hydrogens": | |
return molAtom.hydrogens === condition.count; | |
case "wildcard": | |
return true; | |
case "aromatic": | |
return molAtom.aromatic; | |
case "aliphatic": | |
return !molAtom.aromatic; | |
case "ring": | |
return molAtom.inRing; | |
case "isotope": | |
return molAtom.element === condition.element && molAtom.isotope === condition.isotope; | |
case "recursive": | |
return queryMatchesRecursive(molAtom, condition.query); | |
default: | |
return false; | |
} | |
} | |
function queryMatchesRecursive(molAtom, query) { | |
const matches = findMatches(molAtom.molecule, query); | |
return matches.some(match => match[0] === molAtom); | |
} | |
function findMatches(molecule, query) { | |
const matches = []; | |
const queryAtoms = query.atoms; | |
for (const molAtom of molecule.atoms) { | |
if (!atomMatches(molAtom, queryAtoms[0])) continue; | |
const mapping = new Map([[0, molAtom.id]]); | |
const used = new Set([molAtom.id]); | |
backtrack(molecule, queryAtoms, mapping, used, matches); | |
} | |
return matches.map((match) => Array.from(match.values()).map((id) => molecule.atoms[id])); | |
} | |
function backtrack(molecule, queryAtoms, mapping, used, matches) { | |
if (mapping.size === queryAtoms.length) { | |
matches.push(new Map(mapping)); | |
return; | |
} | |
const currentQueryId = Array.from({ length: queryAtoms.length }, (_, i) => i).find((i) => !mapping.has(i)); | |
const currentQueryAtom = queryAtoms[currentQueryId]; | |
for (const bond of currentQueryAtom.bonds) { | |
const neighborQueryId = bond.toAtom.id; | |
if (mapping.has(neighborQueryId)) { | |
const neighborMolId = mapping.get(neighborQueryId); | |
const neighborMolAtom = molecule.atoms[neighborMolId]; | |
for (const molBond of neighborMolAtom.bonds) { | |
const candidateMolAtom = molBond.toAtom; | |
if (used.has(candidateMolAtom.id) || !atomMatches(candidateMolAtom, currentQueryAtom)) continue; | |
if (bond.bondType !== "any" && molBond.bondType !== bond.bondType) continue; | |
const newMapping = new Map(mapping); | |
const newUsed = new Set(used); | |
newMapping.set(currentQueryId, candidateMolAtom.id); | |
newUsed.add(candidateMolAtom.id); | |
backtrack(molecule, queryAtoms, newMapping, newUsed, matches); | |
} | |
} else { | |
for (const molBond of molecule.atoms[mapping.values().next().value].bonds) { | |
const candidateMolAtom = molBond.toAtom; | |
if (used.has(candidateMolAtom.id) || !atomMatches(candidateMolAtom, currentQueryAtom)) continue; | |
if (bond.bondType !== "any" && molBond.bondType !== bond.bondType) continue; | |
const newMapping = new Map(mapping); | |
const newUsed = new Set(used); | |
newMapping.set(currentQueryId, candidateMolAtom.id); | |
newUsed.add(candidateMolAtom.id); | |
backtrack(molecule, queryAtoms, newMapping, newUsed, matches); | |
} | |
} | |
} | |
if (currentQueryAtom.bonds.length === 0) { | |
matches.push(new Map(mapping)); | |
} | |
} | |
function runTests() { | |
let pass = 0, | |
fail = 0; | |
function test(name, fn) { | |
try { | |
fn(); | |
pass++; | |
console.log(`✓ ${name}`); | |
} catch (e) { | |
fail++; | |
console.error(`✗ ${name}\n ${e.message}`); | |
} | |
} | |
test("Parse simple SMILES", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("CCO"); | |
if (mol.atoms.length !== 3) throw new Error("Expected 3 atoms, got " + mol.atoms.length); | |
if (mol.atoms[0].element !== "C") throw new Error("First atom should be C"); | |
if (mol.atoms[2].element !== "O") throw new Error("Last atom should be O"); | |
}); | |
test("Aromatic system parsing", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("c1ccccc1"); | |
if (mol.atoms.length !== 6) throw new Error("Expected 6 atoms, got " + mol.atoms.length); | |
if (!mol.atoms[0].aromatic) throw new Error("Aromatic flag not set"); | |
}); | |
test("Bracketed atom parsing", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[13CH3-]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "C") throw new Error("Element should be C"); | |
if (atom.isotope !== 13) throw new Error("Isotope should be 13, got " + atom.isotope); | |
if (atom.charge !== -1) throw new Error("Charge should be -1, got " + atom.charge); | |
if (atom.hydrogens !== 3) throw new Error("Expected 3 hydrogens, got " + atom.hydrogens); | |
}); | |
test("SMARTS wildcard matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CCO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("*"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 3) throw new Error("Expected 3 matches, got " + matches.length); | |
}); | |
test("Basic substructure match", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CCO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("O"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1 || matches[0][0].element !== "O") { | |
throw new Error("Oxygen not matched. Matches: " + matches.length); | |
} | |
}); | |
test("Multi-digit charge parsing", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[Fe+++]"); | |
const atom = mol.atoms[0]; | |
if (atom.charge !== 3) throw new Error("Expected charge +3, got " + atom.charge); | |
}); | |
test("Hydrogen count without charge", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[NH3]"); | |
const atom = mol.atoms[0]; | |
if (atom.hydrogens !== 3) throw new Error("Expected 3 hydrogens, got " + atom.hydrogens); | |
}); | |
test("Aromatic nitrogen in bracket", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[nH]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "N" || !atom.aromatic || atom.hydrogens !== 1) { | |
throw new Error("Aromatic N with 1 H not parsed"); | |
} | |
}); | |
test("Complex branching structure", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C(C)(C)CO"); | |
if (mol.atoms.length !== 5) throw new Error("Expected 5 atoms, got " + mol.atoms.length); | |
const root = mol.atoms[0]; | |
if (root.bonds.length !== 3) throw new Error("Root should have 3 bonds, got " + root.bonds.length); | |
}); | |
test("SMARTS logical OR condition", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CCO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[C,N]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 2) throw new Error("Expected 2 matches for C/N, got " + matches.length); | |
}); | |
test("SMARTS logical NOT condition", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CCO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[!C]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1 || matches[0][0].element !== "O") { | |
throw new Error("Expected 1 match for non-carbon, got " + matches.length); | |
} | |
}); | |
test("SMARTS bond type matching", () => { | |
const smilesParser = new SmilesParser(); | |
const molDouble = smilesParser.parse("C=O"); | |
const molSingle = smilesParser.parse("CO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("C=O"); | |
const matchesDouble = findMatches(molDouble, query); | |
if (matchesDouble.length !== 1) throw new Error("Should match double bond, got " + matchesDouble.length); | |
const matchesSingle = findMatches(molSingle, query); | |
if (matchesSingle.length !== 0) throw new Error("Should not match single bond, got " + matchesSingle.length); | |
}); | |
test("Aromatic bond matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("c1ccccc1"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("c:c"); | |
const matches = findMatches(mol, query); | |
if (matches.length < 6) throw new Error("Should find all aromatic bonds, got " + matches.length); | |
}); | |
test("Wildcard bond matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("COC=O"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("C~O"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 3) throw new Error("Should match three C-O bonds, got " + matches.length); | |
}); | |
test("Recursive SMARTS matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C1CCCCC1"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[C;R]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 6) throw new Error("Should match all ring carbons, got " + matches.length); | |
}); | |
test("Isotope parsing in SMILES", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[13C]"); | |
const atom = mol.atoms[0]; | |
if (atom.isotope !== 13) throw new Error("Expected isotope 13, got " + atom.isotope); | |
}); | |
test("Non-organic element in SMILES", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[Fe]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "Fe") throw new Error("Expected Fe element, got " + atom.element); | |
}); | |
test("Explicit hydrogens in SMILES", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[OH2]"); | |
const atom = mol.atoms[0]; | |
if (atom.hydrogens !== 2) throw new Error("Expected 2 hydrogens, got " + atom.hydrogens); | |
}); | |
test("Charge with multiple signs", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[Fe+++]"); | |
const atom = mol.atoms[0]; | |
if (atom.charge !== 3) throw new Error("Expected +3 charge, got " + atom.charge); | |
}); | |
test("Aromatic nitrogen with hydrogen", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[nH]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "N" || !atom.aromatic || atom.hydrogens !== 1) { | |
throw new Error("Aromatic N with 1 H not parsed"); | |
} | |
}); | |
test("SMARTS aromatic wildcard", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("c1ccccc1"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("a"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 6) throw new Error("All aromatic carbons should match, got " + matches.length); | |
}); | |
test("SMARTS aliphatic wildcard", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CCO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("A"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 3) throw new Error("All aliphatic atoms should match, got " + matches.length); | |
}); | |
test("SMARTS logical AND condition", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C1CCCCC1"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[C;R]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 6) throw new Error("All ring carbons should match, got " + matches.length); | |
}); | |
test("SMARTS isotope query", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("[13C]"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[13C]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1) throw new Error("Expected 1 match for isotope 13C, got " + matches.length); | |
}); | |
test("SMARTS any bond wildcard", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C=CO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("C~O"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1) throw new Error("Expected 1 C-O bond, got " + matches.length); | |
}); | |
test("Recursive SMARTS pattern", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C1CCCCC1"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[$([C;R])]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 6) { | |
throw new Error( | |
`All ring carbons should match, expected 6, got ${matches.length}. ` + `Matched atoms: ${matches.map((m) => m[0].id)}` | |
); | |
} | |
}); | |
test("Multi-digit ring closure with %", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C%12CCC%12"); | |
const first = mol.atoms[0]; | |
const last = mol.atoms[3]; | |
const hasClosure = first.bonds.some((b) => b.toAtom === last); | |
if (!hasClosure) { | |
const bonds = first.bonds.map((b) => b.toAtom.id).join(", "); | |
throw new Error(`Ring closure not formed between atoms 0 and 3. ` + `First atom bonds: ${bonds}`); | |
} | |
}); | |
test("Explicit hydrogens in organic element (SMILES)", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[CH2]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "C" || atom.hydrogens !== 2) { | |
throw new Error("Expected CH2, got " + (atom.hydrogens ? `C with ${atom.hydrogens}H` : "C")); | |
} | |
}); | |
test("Two-letter element parsing (SMILES)", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[SiH3]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "Si" || atom.hydrogens !== 3) { | |
throw new Error("Expected SiH3, got " + `${atom.element} with ${atom.hydrogens || 0}H`); | |
} | |
}); | |
test("Numeric charge specification (SMILES)", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[Fe+3]"); | |
const atom = mol.atoms[0]; | |
if (atom.charge !== 3) throw new Error("Expected +3 charge, got " + atom.charge); | |
}); | |
test("SMARTS combined AND/OR logic", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C1CCNC1"); // Cyclopentane with one N | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[C,N;R]"); // (C OR N) AND Ring | |
const matches = findMatches(mol, query); | |
if (matches.length !== 5) { | |
// 4 Cs + 1 N in ring | |
throw new Error(`Expected 5 ring atoms, got ${matches.length}`); | |
} | |
}); | |
test("SMARTS explicit hydrogen matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C[CH2]"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[CH3]"); | |
const matches1 = findMatches(mol, query); | |
const query2 = smartsParser.parse("[CH2]"); | |
const matches2 = findMatches(mol, query2); | |
if (matches1.length !== 1 || matches2.length !== 1) { | |
throw new Error(`H counts not matched (3: ${matches1.length}, 2: ${matches2.length})`); | |
} | |
}); | |
test("High-number ring closure with %", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C%99CCC%99"); | |
const first = mol.atoms[0]; | |
const last = mol.atoms[3]; | |
const hasClosure = first.bonds.some((b) => b.toAtom === last); | |
if (!hasClosure) throw new Error("Failed to create %99 ring closure"); | |
}); | |
test("SMARTS combined NOT conditions", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CCO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[CH2]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1 || matches[0][0].element !== "C") { | |
throw new Error("Should match middle carbon, got " + (matches.length ? matches[0][0].element : "none")); | |
} | |
}); | |
test("Isotope with two-letter element", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[235U]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "U" || atom.isotope !== 235) { | |
throw new Error(`Expected ²³⁵U, got ${atom.isotope || ""}${atom.element}`); | |
} | |
}); | |
test("Recursive SMARTS with bond", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CC(=O)O"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[C;$(C(=O))]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1 || matches[0][0].id !== 1) { | |
throw new Error("Should match carbonyl carbon, got " + matches.length); | |
} | |
}); | |
test("Negative charge parsing", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[O---]"); | |
const atom = mol.atoms[0]; | |
if (atom.charge !== -3) throw new Error("Expected -3 charge, got " + atom.charge); | |
}); | |
test("Chlorine in SMILES", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("ClCBr"); | |
const elements = mol.atoms.map((a) => a.element); | |
if (!elements.includes("Cl") || !elements.includes("Br")) { | |
throw new Error("Halogens not parsed correctly"); | |
} | |
}); | |
test("SMARTS any bond matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C=CO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("C~O"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1) throw new Error("Expected 1 C-O connection, got " + matches.length); | |
}); | |
test("Aromatic bond query", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("c1ccccc1"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("c-:c"); | |
const matches = findMatches(mol, query); | |
if (matches.length < 6) throw new Error("Aromatic bonds not matched properly"); | |
}); | |
test("Isotope with hydrogen (deuterium)", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[2H]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "H" || atom.isotope !== 2) { | |
throw new Error(`Expected deuterium, got ${atom.isotope || ""}${atom.element}`); | |
} | |
}); | |
test("SMARTS recursive carbonyl group", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CC(=O)O"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[$(C=O)]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1 || matches[0][0].element !== "C") { | |
throw new Error("Should match carbonyl carbon, got " + matches.length); | |
} | |
}); | |
test("Implicit hydrogens in methane", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C"); | |
const atom = mol.atoms[0]; | |
if (atom.hydrogens !== 4) { | |
throw new Error("Expected 4 hydrogens for methane, got " + atom.hydrogens); | |
} | |
}); | |
test("Multiple ring closures with same number", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C1C1"); | |
const atom0 = mol.atoms[0]; | |
const atom1 = mol.atoms[1]; | |
const hasBond = atom0.bonds.some(b => b.toAtom === atom1); | |
if (!hasBond) { | |
throw new Error("Failed to form two-membered ring via C1C1"); | |
} | |
}); | |
test("Aromatic bond between aliphatic atoms", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C:C"); | |
const bond = mol.atoms[0].bonds[0]; | |
if (bond.bondType !== "aromatic") { | |
throw new Error(`Bond should be aromatic, got ${bond.bondType}`); | |
} | |
if (mol.atoms[0].aromatic || mol.atoms[1].aromatic) { | |
throw new Error("Atoms incorrectly marked aromatic"); | |
} | |
}); | |
test("SMARTS nested logical conditions", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CNO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[C,N;!O]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 2) { | |
throw new Error(`Expected 2 matches (C/N), got ${matches.length}`); | |
} | |
}); | |
test("Chlorine valence handling", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("ClC"); | |
const cl = mol.atoms[0]; | |
if (cl.hydrogens !== 0) { | |
throw new Error(`Cl should have 0 hydrogens, got ${cl.hydrogens}`); | |
} | |
}); | |
test("SMARTS hydrogen count matching", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("CC"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[CH3]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 2) { | |
throw new Error(`Expected 2 methyl groups, got ${matches.length}`); | |
} | |
}); | |
test("Boron valence calculation", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("B(C)(C)C"); | |
const boron = mol.atoms[0]; | |
if (boron.hydrogens !== 0) { | |
throw new Error(`Boron should have 0 hydrogens, got ${boron.hydrogens}`); | |
} | |
}); | |
test("Three-digit ring closure", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("C%123CCC%123"); | |
const first = mol.atoms[0]; | |
const last = mol.atoms[3]; | |
const hasClosure = first.bonds.some(b => b.toAtom === last); | |
if (!hasClosure) { | |
throw new Error("Failed to handle three-digit ring closure"); | |
} | |
}); | |
test("Selenium in brackets", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("[SeH]"); | |
const atom = mol.atoms[0]; | |
if (atom.element !== "Se" || atom.hydrogens !== 1) { | |
throw new Error(`Parsed [SeH] as ${atom.element}H${atom.hydrogens}`); | |
} | |
}); | |
test("SMARTS not-either condition", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("ClCBr"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("[!C;!Cl]"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1 || matches[0][0].element !== "Br") { | |
throw new Error("Should match bromine only"); | |
} | |
}); | |
test("Phosphorus valence calculation", () => { | |
const parser = new SmilesParser(); | |
const mol = parser.parse("P"); | |
const atom = mol.atoms[0]; | |
if (atom.hydrogens !== 3) { | |
throw new Error(`Phosphorus should have 3 hydrogens, got ${atom.hydrogens}`); | |
} | |
}); | |
test("SMARTS any-bond wildcard", () => { | |
const smilesParser = new SmilesParser(); | |
const mol = smilesParser.parse("C=CO"); | |
const smartsParser = new SmartsParser(); | |
const query = smartsParser.parse("C~O"); | |
const matches = findMatches(mol, query); | |
if (matches.length !== 1) { | |
throw new Error("Should match C-O bond via any-bond wildcard"); | |
} | |
}); | |
console.log(`\nResults: ${pass} passed, ${fail} failed`); | |
} | |
runTests(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment