Created
February 5, 2020 14:50
-
-
Save jiggzson/d49e33454002d744d206002a1e3564d6 to your computer and use it in GitHub Desktop.
Convert MathML to an expression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if (!String.prototype.startsWith) { | |
Object.defineProperty(String.prototype, 'startsWith', { | |
value: function(search, rawPos) { | |
var pos = rawPos > 0 ? rawPos|0 : 0; | |
return this.substring(pos, pos + search.length) === search; | |
} | |
}); | |
} | |
function MLParser() { | |
function Tag(name, start, properties) { | |
this.properties = properties; | |
this.start = start; | |
this.name = name; | |
this.contents = ''; | |
this.children = []; | |
} | |
Tag.prototype.toString = function() { | |
return this.name; | |
}; | |
Tag.prototype.toML = function(spaces, indentLevel) { | |
var pad = function(n) { | |
var p = ''; | |
for(var i=0; i<n; i++) | |
p += ' '; | |
return p; | |
}; | |
spaces = typeof spaces === 'undefined' ? 4 : spaces; | |
indentLevel = indentLevel || 0; | |
var name = this.name; | |
var contents = ''; | |
var p1 = pad(spaces*(indentLevel)); | |
var p2 = pad(spaces*(indentLevel+1)); | |
if(this.children.length === 0) | |
contents = p2+this.contents; | |
else { | |
for(var i=0, l=this.children.length; i<l; i++) { | |
var child = this.children[i]; | |
contents += child.toML(spaces, indentLevel+1); | |
} | |
} | |
var properties = this.properties.join(' '); | |
return (`${p1}<${name} ${properties}>\n`+ | |
`${contents}\n`+ | |
`${p1}</${name}>\n`).replace(/\n\n/g, '\n'); | |
}; | |
Tag.prototype.toExpression = function(o) { | |
o = o || {}; | |
var value = ''; | |
if(this.name === 'msup') { | |
value = this.children[0].toExpression(o)+(o.powerSymbol||'^')+this.children[1].toExpression(o); | |
} | |
else if(this.name === 'msub') { | |
value = this.children[0].toExpression(o)+'_'+this.children[1].toExpression(o); | |
} | |
else if(this.name === 'mfrac') { | |
value = this.children[0].toExpression(o)+'/'+this.children[1].toExpression(o); | |
} | |
else if(this.name === 'msqrt') { | |
var v = this.children[0].toExpression(o); | |
value = `sqrt(${v})`; | |
} | |
else if(this.name === 'mroot') { | |
var v = this.children[0].toExpression(o); | |
var e = o.powerSymbol || '^'; | |
var p = this.children[1].toExpression(o); | |
value = `${v}${e}${p}`; | |
} | |
else if(this.children.length > 0) { | |
for(var i=0; i<this.children.length; i++) { | |
var child = this.children[i]; | |
value += child.toExpression(o); | |
} | |
if(this.name === 'mrow' && this.parent.name === 'msup' && this.parent.children.length > 1) { | |
//wrap it in brackets | |
value = `(${value})`; | |
} | |
} | |
else { | |
value = this.contents; | |
//skip spaces | |
if(value === '⁡') | |
value = ''; | |
else if(value === '⁢') | |
value = '*'; | |
else if(value === '∫') | |
valule = 'integrate'; | |
} | |
return value; | |
}; | |
this.tagify = function(txt) { | |
var lt = '<'; | |
var gt = '>'; | |
var sl = '/'; | |
var contents; | |
var tagnames = []; | |
var target; | |
var l = txt.length; | |
for(var i=0; i<l; i++) { | |
var ch = txt[i]; | |
//if we found a less than sign then it marks a tag | |
if(ch === lt) { | |
//move along the string to find the end tag | |
for(var j=i; j<l; j++) { | |
var ch2 = txt.charAt(j); | |
if(ch2 === gt) { | |
//mark the start | |
var start = i; | |
//grab the tagname | |
var name = txt.substring(i+1, j); | |
//update the index | |
i = j+1; | |
//is it a closing tag | |
if(name.startsWith(sl)) { | |
//get the actual tag name | |
var tagname = name.substring(1, name.length); | |
var prevTag = tagnames.pop(); | |
var contents = txt.substring(prevTag.start, start); | |
prevTag.contents = contents; | |
//point to a new target but don't do anything if the tagnames are empty since | |
//we've reached the math tag at this point | |
if(tagnames.length) | |
target = tagnames[tagnames.length-1]; | |
if(tagname !== prevTag.name) { | |
//complain | |
throw new Error('Expected '+tagname+' but received '+prevTag); | |
} | |
} | |
else { | |
var parts = name.split(' '); | |
//grab the name | |
var name = parts.shift(); | |
var newTag = new Tag(name, i, parts); | |
//mark the tag by adding it to the tagnames stack | |
tagnames.push(newTag); | |
//if there's a target then add it to the target's contents | |
if(target) { | |
newTag.parent = target; | |
target.children.push(newTag); | |
} | |
//Mark the new tag as the target. If there's an opening tag then it will point | |
//back to its parent later | |
target = newTag; | |
} | |
break; | |
} | |
} | |
} | |
} | |
return target; | |
}; | |
this.toExpression = function(txt, options) { | |
var math = this.tagify(txt); | |
return math.toExpression(options); | |
}; | |
} | |
//=========================================================== | |
//USAGE | |
var parser = new MLParser(); | |
var expression = parser.toExpression( | |
` | |
<math xmlns='http://www.w3.org/1998/Math/MathML'> | |
<mfrac> | |
<mrow> | |
<msup> | |
<mi>x</mi> | |
<mn>2</mn> | |
</msup> | |
<mo>⁢</mo> | |
<mrow> | |
<mi>cos</mi> | |
<mo>⁡</mo> | |
<mo>(</mo> | |
<mi>x</mi> | |
<mo>)</mo> | |
</mrow> | |
</mrow> | |
<mrow> | |
<mi>a</mi> | |
<mo>⁢</mo> | |
<mi>b</mi> | |
</mrow> | |
</mfrac> | |
</math> | |
`); | |
console.log(expression); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment