Created
October 21, 2010 00:06
-
-
Save bcherry/637624 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
natcompare.js -- Perform 'natural order' comparisons of strings in JavaScript. | |
Copyright (C) 2005 by SCK-CEN (Belgian Nucleair Research Centre) | |
Written by Kristof Coomans <kristof[dot]coomans[at]sckcen[dot]be> | |
Based on the Java version by Pierre-Luc Paour, of which this is more or less a straight conversion. | |
Copyright (C) 2003 by Pierre-Luc Paour <[email protected]> | |
The Java version was based on the C version by Martin Pool. | |
Copyright (C) 2000 by Martin Pool <[email protected]> | |
Bugfixes made by Twitter (2010). | |
This software is provided 'as-is', without any express or implied | |
warranty. In no event will the authors be held liable for any damages | |
arising from the use of this software. | |
Permission is granted to anyone to use this software for any purpose, | |
including commercial applications, and to alter it and redistribute it | |
freely, subject to the following restrictions: | |
1. The origin of this software must not be misrepresented; you must not | |
claim that you wrote the original software. If you use this software | |
in a product, an acknowledgment in the product documentation would be | |
appreciated but is not required. | |
2. Altered source versions must be plainly marked as such, and must not be | |
misrepresented as being the original software. | |
3. This notice may not be removed or altered from any source distribution. | |
*/ | |
var natcompare = (function() { | |
var isWhitespaceChar = function(a) | |
{ | |
var charCode; | |
charCode = a.charCodeAt(0); | |
if ( charCode <= 32 ) | |
{ | |
return true; | |
} | |
else | |
{ | |
return false; | |
} | |
} | |
var isDigitChar = function(a) | |
{ | |
var charCode; | |
charCode = a.charCodeAt(0); | |
if ( charCode >= 48 && charCode <= 57 ) | |
{ | |
return true; | |
} | |
else | |
{ | |
return false; | |
} | |
} | |
var compareRight = function(a,b) | |
{ | |
var bias = 0; | |
var ia = 0; | |
var ib = 0; | |
var ca; | |
var cb; | |
// The longest run of digits wins. That aside, the greatest | |
// value wins, but we can't know that it will until we've scanned | |
// both numbers to know that they have the same magnitude, so we | |
// remember it in BIAS. | |
for (;; ia++, ib++) { | |
ca = a.charAt(ia); | |
cb = b.charAt(ib); | |
if (!isDigitChar(ca) | |
&& !isDigitChar(cb)) { | |
return bias; | |
} else if (!isDigitChar(ca)) { | |
return -1; | |
} else if (!isDigitChar(cb)) { | |
return +1; | |
} else if (ca < cb) { | |
if (bias == 0) { | |
bias = -1; | |
} | |
} else if (ca > cb) { | |
if (bias == 0) | |
bias = +1; | |
} | |
} | |
} | |
return function (a,b) { | |
if (typeof a === "number") { | |
a = a.toString(); | |
} | |
if (typeof b === "number") { | |
b = b.toString(); | |
} | |
var ia = 0, ib = 0; | |
var nza = 0, nzb = 0; | |
var ca, cb; | |
var result; | |
while (true) | |
{ | |
// only count the number of zeroes leading the last number compared | |
nza = nzb = 0; | |
ca = a.charAt(ia); | |
cb = b.charAt(ib); | |
// skip over leading spaces or zeros | |
while ( isWhitespaceChar( ca ) || ca =='0' ) { | |
if (ca == '0') { | |
nza++; | |
} else { | |
// only count consecutive zeroes | |
nza = 0; | |
} | |
ca = a.charAt(++ia); | |
} | |
while ( isWhitespaceChar( cb ) || cb == '0') { | |
if (cb == '0') { | |
nzb++; | |
} else { | |
// only count consecutive zeroes | |
nzb = 0; | |
} | |
cb = b.charAt(++ib); | |
} | |
// process run of digits | |
if (isDigitChar(ca) && isDigitChar(cb)) { | |
if ((result = compareRight(a.substring(ia), b.substring(ib))) != 0) { | |
return result; | |
} | |
} | |
if (ca == 0 && cb == 0) { | |
// The strings compare the same. Perhaps the caller | |
// will want to call strcmp to break the tie. | |
return nza - nzb; | |
} | |
if (ca < cb) { | |
return -1; | |
} else if (ca > cb) { | |
return +1; | |
} | |
++ia; ++ib; | |
} | |
} | |
}()); |
The problem is that natcompare('10705970246197248', '9999625058521088') returns -1.
Oh, sorry, totally misread that. That does look like a problem. I didn't write this, but it does seem faulty.
Seeing how this is linked from http://groups.google.com/group/twitter-development-talk/browse_thread/thread/71c25e20ddd3e3f0 and is intended to be an example for how to sort large string integers, it might be good to put a big warning that it doesn't work :)
Yep, we're looking into this. Thanks for the report!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Not sure I see the issue?