-
-
Save hackerzhut/7f73ec9018d7f7f3f3cb78caa65e1177 to your computer and use it in GitHub Desktop.
Performance comparison of various base62 encoder techniques in Go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"math" | |
"reflect" | |
"runtime" | |
"sort" | |
"time" | |
) | |
// (62^i)-1 | i = 1..11 | |
var limits = [...]uint64{61, 3843, 238327, 14776335, 916132831, 56800235583, 3521614606207, 218340105584895, 13537086546263551, 839299365868340199, math.MaxUint64} | |
const abc = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" | |
type b62encoder func(uint64) string | |
func b62Encode1(n uint64) string { | |
const Log62 = 4.127134385045091555346396446 | |
l := 1 | |
if n > 0 { | |
l = 1 + int(math.Log(float64(n))/Log62) | |
} | |
result := make([]byte, l) | |
for i := l - 1; i >= 0; i-- { | |
result[i] = abc[n%62] | |
n = n / 62 | |
} | |
return string(result) | |
} | |
func b62Encode2(n uint64) string { | |
var l int | |
var v uint64 | |
for l, v = range limits { | |
if n <= v { | |
break | |
} | |
} | |
result := make([]byte, l+1) | |
for i := l; i >= 0; i-- { | |
result[i] = abc[n%62] | |
n = n / 62 | |
} | |
return string(result) | |
} | |
func b62Encode3(n uint64) string { | |
l := sort.Search(len(limits), func(i int) bool { return limits[i] > n }) | |
result := make([]byte, l+1) | |
for i := l; i >= 0; i-- { | |
result[i] = abc[n%62] | |
n = n / 62 | |
} | |
return string(result) | |
} | |
func b62Encode4(n uint64) string { | |
lo, hi := 0, len(limits)-1 | |
for lo <= hi { | |
m := (lo + hi) / 2 | |
if limits[m] <= n { | |
lo = m + 1 | |
} else { | |
hi = m - 1 | |
} | |
} | |
result := make([]byte, lo+1) | |
for i := lo; i >= 0; i-- { | |
result[i] = abc[n%62] | |
n = n / 62 | |
} | |
return string(result) | |
} | |
func b62Encode5(n uint64) string { | |
result := make([]byte, 0) | |
for { | |
result = append([]byte{abc[n%62]}, result...) | |
n = n / 62 | |
if n == 0 { | |
break | |
} | |
} | |
return string(result) | |
} | |
func b62Encode6(n uint64) string { | |
result := make([]byte, 11) | |
i := 10 | |
for { | |
result[i] = abc[n%62] | |
n = n / 62 | |
if n == 0 { | |
break | |
} | |
i-- | |
} | |
return string(result[i:]) | |
} | |
func b62Decode(a string) (uint64, error) { | |
if len(a) == 0 { | |
return 0, fmt.Errorf("base62 string must not be empty") | |
} | |
var v uint64 | |
for i := 0; i < len(a); i++ { | |
c := int(a[i]) | |
p := 0 | |
switch { | |
case int('a') <= c && c <= int('z'): | |
p = c - int('a') | |
case int('A') <= c && c <= int('Z'): | |
p = c - int('A') + 26 | |
case int('0') <= c && c <= int('9'): | |
p = c - int('0') + 52 | |
default: | |
return 0, fmt.Errorf("invalid base62 symbol: '%c'", a[i]) | |
} | |
v *= 62 | |
v += uint64(p) | |
} | |
return v, nil | |
} | |
func test(fn b62encoder) bool { | |
const N uint64 = 1000000 | |
fmt.Printf("\n***%v\n", runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name()) | |
startValues := make([]uint64, 0) | |
for v := 0; v < 12; v++ { | |
startValues = append(startValues, uint64(math.Pow(62, float64(v)))-1, uint64(math.Pow(62, float64(v)))) | |
} | |
startValues = append(startValues, math.MaxUint64/32, math.MaxUint64/16, math.MaxUint64/8, math.MaxUint64/4, math.MaxUint64/2, math.MaxUint64-N-1) | |
nPassed := 0 | |
passed := true | |
for _, a := range startValues { | |
fmt.Printf("Starting from %25d ...", a) | |
t0 := time.Now() | |
for i := a; i < N+a; i++ { | |
b62 := fn(i) | |
r, err := b62Decode(b62) | |
if r != i || err != nil { | |
fmt.Printf("ERROR: %v != %v (%v) (%v)", r, i, b62, b62Encode6(i)) | |
passed = false | |
break | |
} | |
} | |
if passed { | |
nPassed++ | |
} | |
fmt.Printf(" %v\n", time.Since(t0)) | |
} | |
return nPassed == len(startValues) | |
} | |
func main() { | |
tests := [...]b62encoder{b62Encode1, b62Encode2, b62Encode3, b62Encode4, b62Encode5, b62Encode6} | |
nPassed := 0 | |
for _, t := range tests { | |
if test(t) { | |
fmt.Println("PASSED.") | |
nPassed++ | |
} else { | |
fmt.Println("FAILED.") | |
} | |
} | |
fmt.Println() | |
if nPassed == len(tests) { | |
fmt.Println("ALL TESTS PASSED.") | |
} else { | |
fmt.Println("%d OF %d TESTS FAILED.", len(tests)-nPassed, len(tests)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment