Created
June 25, 2012 16:11
-
-
Save ayanamist/2989518 to your computer and use it in GitHub Desktop.
Better implementation of shExpMatch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function shExpMatch(url, pattern) { | |
var pCharCode; | |
var isAggressive = false; | |
var pIndex; | |
var urlIndex = 0; | |
var lastIndex; | |
var patternLength = pattern.length; | |
var urlLength = url.length; | |
for (pIndex = 0; pIndex < patternLength; pIndex += 1) { | |
pCharCode = pattern.charCodeAt(pIndex); // use charCodeAt for performance, see http://jsperf.com/charat-charcodeat-brackets | |
if (pCharCode === 63) { // use if instead of switch for performance, see http://jsperf.com/switch-if | |
if (isAggressive) { | |
urlIndex += 1; | |
} | |
isAggressive = false; | |
urlIndex += 1; | |
} | |
else if (pCharCode === 42) { | |
if (pIndex === patternLength - 1) { | |
return urlIndex <= urlLength; | |
} | |
else { | |
isAggressive = true; | |
} | |
} | |
else { | |
if (isAggressive) { | |
lastIndex = urlIndex; | |
urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1); | |
if (urlIndex < 0) { | |
if (url.charCodeAt(lastIndex) !== pCharCode) { | |
return false; | |
} | |
urlIndex = lastIndex; | |
} | |
isAggressive = false; | |
} | |
else { | |
if (urlIndex >= urlLength || url.charCodeAt(urlIndex) !== pCharCode) { | |
return false; | |
} | |
} | |
urlIndex += 1; | |
} | |
} | |
return urlIndex === urlLength; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<title>Unit Test</title> | |
<script src="shExpMatch.js" type="text/javascript"></script> | |
<body> | |
<script type="text/javascript"> | |
function shExpMatchOrig(url, pattern) { | |
pattern = pattern.replace(/\./g, '\\.'); | |
pattern = pattern.replace(/\*/g, '.*'); | |
pattern = pattern.replace(/\?/g, '.'); | |
var newRe = new RegExp('^'+pattern+'$'); | |
return newRe.test(url); | |
} | |
function unittest(url, pattern) { | |
var result = shExpMatch(url, pattern); | |
var expected = shExpMatchOrig(url, pattern); | |
var output = "shExpMatch(\"" + url + "\",\"" + pattern + "\")=>" + result + ((result !== expected) ? (" expect " + expected) : ""); | |
document.write(output + "<br />"); | |
console.log(output); | |
} | |
function test() { | |
unittest("ababa", "*aba?"); | |
unittest("?a","a"); | |
unittest("asbasd", "*?*"); | |
unittest("a?","a"); | |
unittest("a","a?"); | |
unittest("a","*a*"); | |
unittest("","*?*"); | |
unittest("1aba", "?*a"); | |
unittest("ababa","*?aba*"); | |
unittest("?a?a*a?a?", "baba|abab"); | |
unittest("http://platform.twitter.com/widgets.js", "*.t?tter.com/*"); | |
unittest("http://platform.twitter.com/widgets.js", "*.tw?tter.com/*"); | |
unittest("http://platform.twitter.com/widgets.js", "*.t.com/*"); | |
unittest("http://platform.twitter.com/widgets.js", "*.twitter.com/*"); | |
} | |
test(); | |
</script> | |
</body> | |
</html> |
Here is my implementation for shExpMatch. I don't know if my version is considered "better implementation", but the code passed @ayanamist's unittest, and also behaves correctly for testing shExpMatch("aaaa", "a*a")
.
The reason I "reinvented the wheel" is because a piece of software I use supports reading pac file but doesn't implement the shExpMatch function (which is quite bizzarre), I ended up have to implement it by myself.
function shExpMatch(url, pat) {
var pcharcode0;
var ucharcode0;
var pcharcode1;
if (pat.length === 0) {
if (url.length === 0) {
return true;
} else {
return false;
}
}
if (pat.length === url.length &&
pat.indexOf('*') < 0 &&
pat.indexOf('?') < 0) {
return pat === url;
}
ucharcode0 = url.charCodeAt(0);
pcharcode0 = pat.charCodeAt(0);
if (pcharcode0 === 42) { // pat[0] === '*'
pcharcode1 = pat.charCodeAt(1);
if (isNaN(pcharcode1)) {
return true;
} else if (pcharcode1 === 42) { // pat[1] === '*' skip continuous '*'
return shExpMatch(url, pat.substr(1));
} else {
if (url.length === 0) {
return false;
}
if (pcharcode1 === ucharcode0 || pcharcode1 === 63) {
if (shExpMatch(url.substr(1), pat)) {
return true;
} else {
return shExpMatch(url.substr(1), pat.substr(2));
}
} else {
return shExpMatch(url.substr(1), pat);
}
}
} else if (pcharcode0 === 63) {
if (url.length === 0) {
return false;
}
return shExpMatch(url.substr(1),pat.substr(1));
} else {
if (url.length === 0) {
return false;
}
if (ucharcode0 === pcharcode0) {
return shExpMatch(url.substr(1),pat.substr(1));
} else {
return false;
}
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@linnaea
The code design has a flaw, the problem is at line 29:
urlIndex = url.indexOf(String.fromCharCode(pCharCode), lastIndex + 1);
where it acts kind of like the lazy mode search for regular expression which matches the first occurrence instead of as many as possible.