-
-
Save ayanamist/2989518 to your computer and use it in GitHub Desktop.
| function shExpMatch(url, pattern) { | |
| var pCharCode; | |
| var isAggressive = false; | |
| var pIndex; | |
| var urlIndex = 0; | |
| var lastIndex; | |
| var patternLength = pattern.length; | |
| var urlLength = url.length; | |
| for (pIndex = 0; pIndex < patternLength; pIndex += 1) { | |
| pCharCode = pattern.charCodeAt(pIndex); // use charCodeAt for performance, see http://jsperf.com/charat-charcodeat-brackets | |
| if (pCharCode === 63) { // use if instead of switch for performance, see http://jsperf.com/switch-if | |
| if (isAggressive) { | |
| urlIndex += 1; | |
| } | |
| isAggressive = false; | |
| urlIndex += 1; | |
| } | |
| else if (pCharCode === 42) { | |
| if (pIndex === patternLength - 1) { | |
| return urlIndex <= urlLength; | |
| } | |
| else { | |
| isAggressive = true; | |
| } | |
| } | |
| else { | |
| if (isAggressive) { | |
| lastIndex = urlIndex; | |
| urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1); | |
| if (urlIndex < 0) { | |
| if (url.charCodeAt(lastIndex) !== pCharCode) { | |
| return false; | |
| } | |
| urlIndex = lastIndex; | |
| } | |
| isAggressive = false; | |
| } | |
| else { | |
| if (urlIndex >= urlLength || url.charCodeAt(urlIndex) !== pCharCode) { | |
| return false; | |
| } | |
| } | |
| urlIndex += 1; | |
| } | |
| } | |
| return urlIndex === urlLength; | |
| } |
| <html> | |
| <head> | |
| <title>Unit Test</title> | |
| <script src="shExpMatch.js" type="text/javascript"></script> | |
| <body> | |
| <script type="text/javascript"> | |
| function shExpMatchOrig(url, pattern) { | |
| pattern = pattern.replace(/\./g, '\\.'); | |
| pattern = pattern.replace(/\*/g, '.*'); | |
| pattern = pattern.replace(/\?/g, '.'); | |
| var newRe = new RegExp('^'+pattern+'$'); | |
| return newRe.test(url); | |
| } | |
| function unittest(url, pattern) { | |
| var result = shExpMatch(url, pattern); | |
| var expected = shExpMatchOrig(url, pattern); | |
| var output = "shExpMatch(\"" + url + "\",\"" + pattern + "\")=>" + result + ((result !== expected) ? (" expect " + expected) : ""); | |
| document.write(output + "<br />"); | |
| console.log(output); | |
| } | |
| function test() { | |
| unittest("ababa", "*aba?"); | |
| unittest("?a","a"); | |
| unittest("asbasd", "*?*"); | |
| unittest("a?","a"); | |
| unittest("a","a?"); | |
| unittest("a","*a*"); | |
| unittest("","*?*"); | |
| unittest("1aba", "?*a"); | |
| unittest("ababa","*?aba*"); | |
| unittest("?a?a*a?a?", "baba|abab"); | |
| unittest("http://platform.twitter.com/widgets.js", "*.t?tter.com/*"); | |
| unittest("http://platform.twitter.com/widgets.js", "*.tw?tter.com/*"); | |
| unittest("http://platform.twitter.com/widgets.js", "*.t.com/*"); | |
| unittest("http://platform.twitter.com/widgets.js", "*.twitter.com/*"); | |
| } | |
| test(); | |
| </script> | |
| </body> | |
| </html> |
what are the license of this function implementation ?
This implementation is broken, shExpMatch("aaaa", "a*a") should return true.
@linnaea
The code design has a flaw, the problem is at line 29: urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1); where it acts kind of like the lazy mode search for regular expression which matches the first occurrence instead of as many as possible.
Here is my implementation for shExpMatch. I don't know if my version is considered "better implementation", but the code passed @ayanamist's unittest, and also behaves correctly for testing shExpMatch("aaaa", "a*a").
The reason I "reinvented the wheel" is because a piece of software I use supports reading pac file but doesn't implement the shExpMatch function (which is quite bizzarre), I ended up have to implement it by myself.
function shExpMatch(url, pat) {
var pcharcode0;
var ucharcode0;
var pcharcode1;
if (pat.length === 0) {
if (url.length === 0) {
return true;
} else {
return false;
}
}
if (pat.length === url.length &&
pat.indexOf('*') < 0 &&
pat.indexOf('?') < 0) {
return pat === url;
}
ucharcode0 = url.charCodeAt(0);
pcharcode0 = pat.charCodeAt(0);
if (pcharcode0 === 42) { // pat[0] === '*'
pcharcode1 = pat.charCodeAt(1);
if (isNaN(pcharcode1)) {
return true;
} else if (pcharcode1 === 42) { // pat[1] === '*' skip continuous '*'
return shExpMatch(url, pat.substr(1));
} else {
if (url.length === 0) {
return false;
}
if (pcharcode1 === ucharcode0 || pcharcode1 === 63) {
if (shExpMatch(url.substr(1), pat)) {
return true;
} else {
return shExpMatch(url.substr(1), pat.substr(2));
}
} else {
return shExpMatch(url.substr(1), pat);
}
}
} else if (pcharcode0 === 63) {
if (url.length === 0) {
return false;
}
return shExpMatch(url.substr(1),pat.substr(1));
} else {
if (url.length === 0) {
return false;
}
if (ucharcode0 === pcharcode0) {
return shExpMatch(url.substr(1),pat.substr(1));
} else {
return false;
}
}
This is a new implementation of shExpMatch function used in pac script with brandly fast performance
See http://jsperf.com/shexpmatch/4