Skip to content

Instantly share code, notes, and snippets.

@bga
Created March 25, 2011 23:23
Show Gist options
  • Select an option

  • Save bga/887843 to your computer and use it in GitHub Desktop.

Select an option

Save bga/887843 to your computer and use it in GitHub Desktop.
/*
`String.prototype._matchWithPoss(re, p)` (c) New BSD License
@param re {RegExp}
@param p ?= 0 {Number} start position to match
Matches regExp with subgroups and back references as build-in `String.prototype.match` but adds info about subgroups positions in string.
Example:
console.log(' 123abcABC123abc'._matchWithPoss(/(?:\d+)([a-z]+)[A-Z]+(\d+\1)/, 1));
[
{ // 0 - whole matched substring with matched position
"s": "123abcABC123abc",
"p": 2
},
{ // 1 - subgroup `([a-z]+)` with position
"s": "abc",
"p": 5
},
{ // 2 - subgroup `(\d+\1)` with position
"s": "123abc",
"p": 11
}
]
*/
;(function(Global)
{
var _expandREStr = function(s)
{
var out = '(';
var p = 0;
var openBracket = -1;
var closeBracket = -1;
var ref = -1;
var origGroupIndexes = [];
var curGroupIndex = 1;
mainLoop: for(;;)
{
if(openBracket < p)
{
openBracket = p - 1;
do
{
openBracket = s.indexOf('(', openBracket + 1) >>> 0
}
while(openBracket < 4294967295 && s.charAt(openBracket - 1) == '\\');
}
if(closeBracket < p)
{
closeBracket = p - 1;
do
{
closeBracket = s.indexOf(')', closeBracket + 1) >>> 0
}
while(closeBracket < 4294967295 && s.charAt(closeBracket - 1) == '\\');
}
if(ref < p)
{
ref = p - 2;
do
{
ref = s.indexOf('\\', ref + 2) >>> 0
}
while(ref < 4294967295 && !/^\d/.test(s.charAt(ref + 1)));
}
console.log(openBracket, closeBracket, ref);
var op = p;
p = Math.min(openBracket, closeBracket, ref);
out += s.slice(op, p);
switch(p)
{
case 4294967295:
out += ')';
break mainLoop;
case openBracket:
out += ')((';
++p;
if(s.slice(p, p + 2) == '?:')
{
out += '?:';
p += 2;
}
else
{
origGroupIndexes.push(++curGroupIndex);
}
++curGroupIndex;
break;
case closeBracket:
out += '))(';
++curGroupIndex;
++p;
break;
case ref:
var match = /\d+/.exec(s.slice(p));
var n = Number(match[0]);
out += '\\' + origGroupIndexes[n - 1];
p += match[0].length + 1;
break;
}
}
return {origGroupIndexes: origGroupIndexes, out: out};
};
var _expandRE = function(re)
{
var reStr = String(re);
var mod = reStr.slice(reStr.lastIndexOf('/') + 1) + 'g';
var rec = _expandREStr(re.source);
return {re: RegExp(rec.out, mod), origGroupIndexes: rec.origGroupIndexes};
};
var _createMatch = function(match, p, rec)
{
console.log(rec);
var origGroupIndexes = rec.origGroupIndexes, origGroupIndexesLen = origGroupIndexes.length;
var origMatch = [{s: match[0], p: p}];
if(origGroupIndexesLen == 0)
return origMatch;
var j = 0, c = origGroupIndexes[j];
var i = 0, len = match.length; while(++i < len)
{
if(i == c)
{
origMatch[j + 1] = {s: match[i], p: p};
if(++j == origGroupIndexesLen)
break;
c = origGroupIndexes[j];
}
else
{
p += match[i].length;
}
}
return origMatch;
};
var cache = {};
String.prototype._matchWithPoss = function(re, p)
{
var s = this;
if(p == null) p = 0;
if(p < 0) p = 0;
if(p > s.length) return null;
var reStr = String(re);
var rec = cache[reStr] || (cache[reStr] = _expandRE(re));
rec.re.lastIndex = 0;
var match = rec.re.exec(s.slice(p));
if(match == null) return null;
return _createMatch(match, rec.re.lastIndex - match[0].length, rec);
};
})(this);
//console.log(' 123abcABC123abc'._matchWithPoss(/(?:\d+)([a-z]+)[A-Z]+(\d+\1)/, 1));
//console.log(_expandREStr(/(?:\d+)\s+(\d+(\w+))\s+\1\2/.source));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment