-
-
Save ayanamist/2989518 to your computer and use it in GitHub Desktop.
function shExpMatch(url, pattern) { | |
var pCharCode; | |
var isAggressive = false; | |
var pIndex; | |
var urlIndex = 0; | |
var lastIndex; | |
var patternLength = pattern.length; | |
var urlLength = url.length; | |
for (pIndex = 0; pIndex < patternLength; pIndex += 1) { | |
pCharCode = pattern.charCodeAt(pIndex); // use charCodeAt for performance, see http://jsperf.com/charat-charcodeat-brackets | |
if (pCharCode === 63) { // use if instead of switch for performance, see http://jsperf.com/switch-if | |
if (isAggressive) { | |
urlIndex += 1; | |
} | |
isAggressive = false; | |
urlIndex += 1; | |
} | |
else if (pCharCode === 42) { | |
if (pIndex === patternLength - 1) { | |
return urlIndex <= urlLength; | |
} | |
else { | |
isAggressive = true; | |
} | |
} | |
else { | |
if (isAggressive) { | |
lastIndex = urlIndex; | |
urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1); | |
if (urlIndex < 0) { | |
if (url.charCodeAt(lastIndex) !== pCharCode) { | |
return false; | |
} | |
urlIndex = lastIndex; | |
} | |
isAggressive = false; | |
} | |
else { | |
if (urlIndex >= urlLength || url.charCodeAt(urlIndex) !== pCharCode) { | |
return false; | |
} | |
} | |
urlIndex += 1; | |
} | |
} | |
return urlIndex === urlLength; | |
} |
<html> | |
<head> | |
<title>Unit Test</title> | |
<script src="shExpMatch.js" type="text/javascript"></script> | |
<body> | |
<script type="text/javascript"> | |
function shExpMatchOrig(url, pattern) { | |
pattern = pattern.replace(/\./g, '\\.'); | |
pattern = pattern.replace(/\*/g, '.*'); | |
pattern = pattern.replace(/\?/g, '.'); | |
var newRe = new RegExp('^'+pattern+'$'); | |
return newRe.test(url); | |
} | |
function unittest(url, pattern) { | |
var result = shExpMatch(url, pattern); | |
var expected = shExpMatchOrig(url, pattern); | |
var output = "shExpMatch(\"" + url + "\",\"" + pattern + "\")=>" + result + ((result !== expected) ? (" expect " + expected) : ""); | |
document.write(output + "<br />"); | |
console.log(output); | |
} | |
function test() { | |
unittest("ababa", "*aba?"); | |
unittest("?a","a"); | |
unittest("asbasd", "*?*"); | |
unittest("a?","a"); | |
unittest("a","a?"); | |
unittest("a","*a*"); | |
unittest("","*?*"); | |
unittest("1aba", "?*a"); | |
unittest("ababa","*?aba*"); | |
unittest("?a?a*a?a?", "baba|abab"); | |
unittest("http://platform.twitter.com/widgets.js", "*.t?tter.com/*"); | |
unittest("http://platform.twitter.com/widgets.js", "*.tw?tter.com/*"); | |
unittest("http://platform.twitter.com/widgets.js", "*.t.com/*"); | |
unittest("http://platform.twitter.com/widgets.js", "*.twitter.com/*"); | |
} | |
test(); | |
</script> | |
</body> | |
</html> |
This implementation is broken, shExpMatch("aaaa", "a*a") should return true.
@linnaea
The code design has a flaw, the problem is at line 29: urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1);
where it acts kind of like the lazy mode search for regular expression which matches the first occurrence instead of as many as possible.
Here is my implementation for shExpMatch. I don't know if my version is considered "better implementation", but the code passed @ayanamist's unittest, and also behaves correctly for testing shExpMatch("aaaa", "a*a")
.
The reason I "reinvented the wheel" is because a piece of software I use supports reading pac file but doesn't implement the shExpMatch function (which is quite bizzarre), I ended up have to implement it by myself.
function shExpMatch(url, pat) {
var pcharcode0;
var ucharcode0;
var pcharcode1;
if (pat.length === 0) {
if (url.length === 0) {
return true;
} else {
return false;
}
}
if (pat.length === url.length &&
pat.indexOf('*') < 0 &&
pat.indexOf('?') < 0) {
return pat === url;
}
ucharcode0 = url.charCodeAt(0);
pcharcode0 = pat.charCodeAt(0);
if (pcharcode0 === 42) { // pat[0] === '*'
pcharcode1 = pat.charCodeAt(1);
if (isNaN(pcharcode1)) {
return true;
} else if (pcharcode1 === 42) { // pat[1] === '*' skip continuous '*'
return shExpMatch(url, pat.substr(1));
} else {
if (url.length === 0) {
return false;
}
if (pcharcode1 === ucharcode0 || pcharcode1 === 63) {
if (shExpMatch(url.substr(1), pat)) {
return true;
} else {
return shExpMatch(url.substr(1), pat.substr(2));
}
} else {
return shExpMatch(url.substr(1), pat);
}
}
} else if (pcharcode0 === 63) {
if (url.length === 0) {
return false;
}
return shExpMatch(url.substr(1),pat.substr(1));
} else {
if (url.length === 0) {
return false;
}
if (ucharcode0 === pcharcode0) {
return shExpMatch(url.substr(1),pat.substr(1));
} else {
return false;
}
}
what are the license of this function implementation ?