Created
March 25, 2011 23:23
-
-
Save bga/887843 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| `String.prototype._matchWithPoss(re, p)` (c) New BSD License | |
| @param re {RegExp} | |
| @param p ?= 0 {Number} start position to match | |
| Matches regExp with subgroups and back references as build-in `String.prototype.match` but adds info about subgroups positions in string. | |
| Example: | |
| console.log(' 123abcABC123abc'._matchWithPoss(/(?:\d+)([a-z]+)[A-Z]+(\d+\1)/, 1)); | |
| [ | |
| { // 0 - whole matched substring with matched position | |
| "s": "123abcABC123abc", | |
| "p": 2 | |
| }, | |
| { // 1 - subgroup `([a-z]+)` with position | |
| "s": "abc", | |
| "p": 5 | |
| }, | |
| { // 2 - subgroup `(\d+\1)` with position | |
| "s": "123abc", | |
| "p": 11 | |
| } | |
| ] | |
| */ | |
| ;(function(Global) | |
| { | |
| var _expandREStr = function(s) | |
| { | |
| var out = '('; | |
| var p = 0; | |
| var openBracket = -1; | |
| var closeBracket = -1; | |
| var ref = -1; | |
| var origGroupIndexes = []; | |
| var curGroupIndex = 1; | |
| mainLoop: for(;;) | |
| { | |
| if(openBracket < p) | |
| { | |
| openBracket = p - 1; | |
| do | |
| { | |
| openBracket = s.indexOf('(', openBracket + 1) >>> 0 | |
| } | |
| while(openBracket < 4294967295 && s.charAt(openBracket - 1) == '\\'); | |
| } | |
| if(closeBracket < p) | |
| { | |
| closeBracket = p - 1; | |
| do | |
| { | |
| closeBracket = s.indexOf(')', closeBracket + 1) >>> 0 | |
| } | |
| while(closeBracket < 4294967295 && s.charAt(closeBracket - 1) == '\\'); | |
| } | |
| if(ref < p) | |
| { | |
| ref = p - 2; | |
| do | |
| { | |
| ref = s.indexOf('\\', ref + 2) >>> 0 | |
| } | |
| while(ref < 4294967295 && !/^\d/.test(s.charAt(ref + 1))); | |
| } | |
| console.log(openBracket, closeBracket, ref); | |
| var op = p; | |
| p = Math.min(openBracket, closeBracket, ref); | |
| out += s.slice(op, p); | |
| switch(p) | |
| { | |
| case 4294967295: | |
| out += ')'; | |
| break mainLoop; | |
| case openBracket: | |
| out += ')(('; | |
| ++p; | |
| if(s.slice(p, p + 2) == '?:') | |
| { | |
| out += '?:'; | |
| p += 2; | |
| } | |
| else | |
| { | |
| origGroupIndexes.push(++curGroupIndex); | |
| } | |
| ++curGroupIndex; | |
| break; | |
| case closeBracket: | |
| out += '))('; | |
| ++curGroupIndex; | |
| ++p; | |
| break; | |
| case ref: | |
| var match = /\d+/.exec(s.slice(p)); | |
| var n = Number(match[0]); | |
| out += '\\' + origGroupIndexes[n - 1]; | |
| p += match[0].length + 1; | |
| break; | |
| } | |
| } | |
| return {origGroupIndexes: origGroupIndexes, out: out}; | |
| }; | |
| var _expandRE = function(re) | |
| { | |
| var reStr = String(re); | |
| var mod = reStr.slice(reStr.lastIndexOf('/') + 1) + 'g'; | |
| var rec = _expandREStr(re.source); | |
| return {re: RegExp(rec.out, mod), origGroupIndexes: rec.origGroupIndexes}; | |
| }; | |
| var _createMatch = function(match, p, rec) | |
| { | |
| console.log(rec); | |
| var origGroupIndexes = rec.origGroupIndexes, origGroupIndexesLen = origGroupIndexes.length; | |
| var origMatch = [{s: match[0], p: p}]; | |
| if(origGroupIndexesLen == 0) | |
| return origMatch; | |
| var j = 0, c = origGroupIndexes[j]; | |
| var i = 0, len = match.length; while(++i < len) | |
| { | |
| if(i == c) | |
| { | |
| origMatch[j + 1] = {s: match[i], p: p}; | |
| if(++j == origGroupIndexesLen) | |
| break; | |
| c = origGroupIndexes[j]; | |
| } | |
| else | |
| { | |
| p += match[i].length; | |
| } | |
| } | |
| return origMatch; | |
| }; | |
| var cache = {}; | |
| String.prototype._matchWithPoss = function(re, p) | |
| { | |
| var s = this; | |
| if(p == null) p = 0; | |
| if(p < 0) p = 0; | |
| if(p > s.length) return null; | |
| var reStr = String(re); | |
| var rec = cache[reStr] || (cache[reStr] = _expandRE(re)); | |
| rec.re.lastIndex = 0; | |
| var match = rec.re.exec(s.slice(p)); | |
| if(match == null) return null; | |
| return _createMatch(match, rec.re.lastIndex - match[0].length, rec); | |
| }; | |
| })(this); | |
| //console.log(' 123abcABC123abc'._matchWithPoss(/(?:\d+)([a-z]+)[A-Z]+(\d+\1)/, 1)); | |
| //console.log(_expandREStr(/(?:\d+)\s+(\d+(\w+))\s+\1\2/.source)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment