Skip to content

Instantly share code, notes, and snippets.

@rfprod
Last active April 30, 2017 21:10
Show Gist options
  • Save rfprod/34f17716ebdde547d2486fc2dc468b84 to your computer and use it in GitHub Desktop.
Save rfprod/34f17716ebdde547d2486fc2dc468b84 to your computer and use it in GitHub Desktop.
Parse Molecule
parseMolecule = (formula) ->
# console.log 'formula ' + formula
elements = {}
countElements = (input) ->
# console.log 'count elements ' + input
elts = input.match /[A-Z]([a-z])?(\d+)?/g
for i in [0..elts.length - 1] by 1
elt = (elts[i].match /[A-Z]([a-z])?(?=\d+)?/)[0]
count = elts[i].match /\d+/
count = if not count then 1 else (count.map (item) -> parseInt(item))[0]
elements[elt] = if elements[elt] >= 0 then elements[elt] + count else count
openBrackets = (input, closingBracket) ->
# console.log 'open brackets, input ' + input
output = ''
multiplier = if (input.split closingBracket)[1] is '' then 1 else parseInt((input.split closingBracket)[1])
# console.log multiplier
elts = input.match /[A-Z]([a-z])?(\d+)?/g
for i in [0..elts.length - 1] by 1
elt = elts[i]
eltName = (elt.match /^[A-Z]([a-z])?/)[0]
atoms = if elt.match /\d+$/ then parseInt((elt.match /\d+$/)[0]) else 1
atoms *= multiplier
output += eltName + atoms
# console.log 'output ' + output
formula = formula.replace input, output
return output
if /(\[|\]|\{|\}|\(|\))/.test formula
# console.log 'extract inner round brackets first'
formulaDecomp = formula.match /\([^\(\)]+\)\d*/g
if formulaDecomp
# console.log 'extract round, formulaDecomp ' + formulaDecomp
for i in [0..formulaDecomp.length - 1] by 1
item = formulaDecomp[i]
openBrackets item, ')'
# console.log 'extract middle square brackets'
formulaDecomp = formula.match /\[[^\[\]]+\]\d*/g
if formulaDecomp
# console.log 'extract square, formulaDecomp ' + formulaDecomp
for i in [0..formulaDecomp.length - 1] by 1
item = formulaDecomp[i]
openBrackets item, ']'
# console.log 'extract outer curly brackets'
formulaDecomp = formula.match /\{[^\[\]]+\}\d*/g
if formulaDecomp
# console.log 'extract curly, formulaDecomp ' + formulaDecomp
for i in [0..formulaDecomp.length - 1] by 1
item = formulaDecomp[i]
openBrackets item, '}'
if not /(\[|\]|\{|\}|\(|\))/.test formula
# console.log 'no brackets'
countElements formula
# console.log 'elements ' + elements
return elements
# TEST
parseMolecule 'H2O' # {H: 2, O: 1})
parseMolecule 'Mg(OH)2' # {Mg: 1, O: 2, H: 2})
parseMolecule 'K4[ON(SO3)2]2' # {K: 4, O: 14, N: 2, S: 4})
parseMolecule 'As2{Be4C5[BCo3(CO2)3]2}4Cu5' # {As: 2, B: 8, Be: 16, C: 44, Cu: 5, Co: 24, O: 48}
function parseMolecule(formula) {
// console.log('formula:', formula);
let elements = {};
function countElements(input) {
// console.log('count elements, input:', input);
const elts = input.match(/[A-Z]([a-z])?(\d+)?/g);
for (let i in elts) {
const elt = elts[i].match(/[A-Z]([a-z])?(?=\d+)?/)[0];
let count = elts[i].match(/\d+/);
count = (!count) ? 1 : count.map(Number)[0];
// console.log(elt, '|', count);
if (elements.hasOwnProperty(elt)) { elements[elt] += count; }
else { elements[elt] = count; }
}
// console.log('elements:', elements);
}
function openBrackets(input, closingBracket = ')') {
let output = '';
const multiplier = parseInt(input.split(closingBracket)[1]) || 1;
const elts = input.match(/[A-Z]([a-z])?(\d+)?/g);
// console.log('elts: ',elts);
for (let elt of elts) {
const eltName = elt.match(/^[A-Z]([a-z])?/)[0];
// console.log('eltName:', eltName);
let atoms = (elt.match(/\d+$/)) ? parseInt(elt.match(/\d+$/)[0]) : 1;
// console.log('init atoms:', atoms);
atoms *= multiplier;
// console.log('resulting atoms:', atoms);
output += eltName + atoms;
}
// console.log(output);
formula = formula.replace(input, output);
return output;
};
if (/(\[|\]|\{|\}|\(|\))/.test(formula)) {
// console.log('brackets detected, extract elements from brackets:', formula);
// extract inner round brackets first
let formulaDecomp = formula.match(new RegExp(/\([^\(\)]+\)\d*/, 'g'));
// console.log('formulaDecomp, round brackets:', formulaDecomp);
if (formulaDecomp) {
for (let item of formulaDecomp) {
//console.log('item:', item);
openBrackets(item, ')');
// console.log(' >> formula:', formula);
}
}
// extract middle square brackets
formulaDecomp = formula.match(new RegExp(/\[[^\[\]]+\]\d*/, 'g'));
// console.log('formulaDecomp, square brackets:', formulaDecomp);
if (formulaDecomp) {
for (let item of formulaDecomp) {
openBrackets(item, ']');
// console.log(' >> formula:', formula);
}
}
// extract outer curly brackets
formulaDecomp = formula.match(new RegExp(/\{[^\[\]]+\}\d*/, 'g'));
// console.log('formulaDecomp, curly brackets:', formulaDecomp);
if (formulaDecomp) {
for (let item of formulaDecomp) {
openBrackets(item, '}');
// console.log(' >> formula:', formula);
}
}
}
if (!/(\[|\]|\{|\}|\(|\))/.test(formula)) {
// console.log('no brackets, count elements:', formula);
countElements(formula);
}
return elements;
}
// TEST
parseMolecule('H2O'); // {H: 2, O: 1})
parseMolecule('Mg(OH)2'); // {Mg: 1, O: 2, H: 2})
parseMolecule('K4[ON(SO3)2]2'); // {K: 4, O: 14, N: 2, S: 4})
parseMolecule('As2{Be4C5[BCo3(CO2)3]2}4Cu5'); // {As: 2, B: 8, Be: 16, C: 44, Cu: 5, Co: 24, O: 48}

Parse Molecule

The function parseMolecule counts the number of atoms of each element contained in the molecule, in a given chemical formula represented by a string, and returns an object.

Sample output for formula H2O: {H: 2, O: 1}

A script by V.

License.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment