Last active
May 15, 2019 18:52
-
-
Save kylelemons/18252b7e4c1892cea03303d1819bd865 to your computer and use it in GitHub Desktop.
Sort a slice of strings, treating numbers by value instead of by ASCII.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"sort" | |
"strings" | |
"unicode" | |
) | |
func main() { | |
s := []string{ | |
"12345", | |
"123456", | |
"foo-223", | |
"foo-023", | |
"foo-1234", | |
"bar-100", | |
"bar-99", | |
"bar-100-bar", | |
"bar-99-bar-9", | |
"bar-99-bar-10", | |
} | |
sort.Slice(s, Numerically(s)) | |
for _, s := range s { | |
fmt.Println(s) | |
} | |
} | |
// Numerically is a helper for passing to sort.Slice or sorting a slice of strings numerically. | |
func Numerically(s []string) func(int, int) bool { | |
return func(si, sj int) bool { | |
return NumericLess(s[si], s[sj]) | |
} | |
} | |
// NumericLess returns true if a is less than b treating numbers within the string as a group, | |
// rather than byte by byte. For example, "foo-9-bar" is less than "foo-10-baz". | |
func NumericLess(a, b string) bool { | |
not := func(f func(rune) bool) func(rune) bool { return func(r rune) bool { return !f(r) } } | |
digits := func(s string) int { | |
digits := strings.IndexFunc(s, not(unicode.IsDigit)) | |
if digits == -1 { | |
return len(s) | |
} | |
return digits | |
} | |
ai, bi := 0, 0 | |
for { | |
if aend, bend := ai >= len(a), bi >= len(b); aend != bend { | |
// The shorter one is less if all prior characters are equal. | |
return aend | |
} else if aend && bend { | |
// Neither one is less if all characters are equal. | |
return false | |
} | |
ac, bc := a[ai], b[bi] | |
adigit, bdigit := unicode.IsDigit(rune(ac)), unicode.IsDigit(rune(bc)) | |
if adigit != bdigit { | |
// Sort numeric before non-numeric. | |
return adigit | |
} else if adigit && bdigit { | |
// Sort numeric portion all at once. | |
adigits, bdigits := digits(a[ai:]), digits(b[bi:]) | |
if adigits != bdigits { | |
// The string with fewer digits is less. | |
return adigits < bdigits | |
} | |
if asub, bsub := a[ai:][:adigits], b[bi:][:bdigits]; asub != bsub { | |
// Lexicographic comparison works for all-digit strings. | |
return asub < bsub | |
} | |
ai += adigits | |
bi += bdigits | |
} | |
if ac != bc { | |
return ac < bc | |
} | |
ai++ | |
bi++ | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment