-
-
Save stoewer/fbe273b711e6a06315d19552dd4d33e6 to your computer and use it in GitHub Desktop.
import ( | |
"fmt" | |
"strings" | |
"regexp" | |
) | |
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)") | |
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])") | |
func ToSnakeCase(str string) string { | |
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}") | |
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}") | |
return strings.ToLower(snake) | |
} |
Fails for the input
already_Snake
Changing the first regex to ([A-Z])([A-Z][a-z])
seems to solve the issue:
import (
"fmt"
"strings"
"regexp"
)
var matchFirstCap = regexp.MustCompile("([A-Z])([A-Z][a-z])")
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
func ToSnakeCase(str string) string {
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
}
After changing to our projects guidelines and requirements, I have following function:
var matchFirstCap = regexp.MustCompile("([A-Z])([A-Z][a-z])")
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
// ToSnakeCase converts the provided string to snake_case.
// Based on https://gist.github.com/stoewer/fbe273b711e6a06315d19552dd4d33e6
func ToSnakeCase(input string) string {
output := matchFirstCap.ReplaceAllString(input, "${1}_${2}")
output = matchAllCap.ReplaceAllString(output, "${1}_${2}")
output = strings.ReplaceAll(output, "-", "_")
return strings.ToLower(output)
}
This function passes following (extended) test:
import (
"testing"
"github.com/stretchr/testify/assert"
)
func Test_ToSnakeCase(t *testing.T) {
// Based on https://gist.github.com/stoewer/fbe273b711e6a06315d19552dd4d33e6
tests := []struct {
input string
expected string
}{
{"", ""},
{"camelCase", "camel_case"},
{"PascalCase", "pascal_case"},
{"snake_case", "snake_case"},
{"Pascal_Snake", "pascal_snake"},
{"SCREAMING_SNAKE", "screaming_snake"},
{"kebab-case", "kebab_case"},
{"Pascal-Kebab", "pascal_kebab"},
{"SCREAMING-KEBAB", "screaming_kebab"},
{"A", "a"},
{"AA", "aa"},
{"AAA", "aaa"},
{"AAAA", "aaaa"},
{"AaAa", "aa_aa"},
{"HTTPRequest", "http_request"},
{"BatteryLifeValue", "battery_life_value"},
{"Id0Value", "id0_value"},
{"ID0Value", "id0_value"},
}
for _, test := range tests {
result := ToSnakeCase(test.input)
assert.Equal(t, test.expected, result)
}
}
Jen's function is awesome and covers a lot of different cases. But using regexp
when you just need to convert uppercase letters into '_' plus lowercase letter - is overkill.
Here is very simple and much faster solution:
func toSnakeCase(s string) string {
var res = make([]rune, 0, len(s))
for i, r := range s {
if unicode.IsUpper(r) && i > 0 {
res = append(res, '_', unicode.ToLower(r))
} else {
res = append(res, unicode.ToLower(r))
}
}
return string(res)
}
And here is benchmark for Jen's variant and mine:
BenchmarkRegexp-8 92520 11980 ns/op 1061 B/op 59 allocs/op
BenchmarkRange-8 609658 1807 ns/op 744 B/op 20 allocs/op
6 times faster and 3 times less allocations!
@porfirion thanks for the proposal and comparison! I noticed most of my unit test fail when using your version. Therefore, I tried to make a version which succeeds (nearly) all tests. It can still use some cleanup and unit test for HTTPRequest
still fails, but nearly same performance improvements.
Code
func ToSnakeCase(s string) string {
var res = make([]rune, 0, len(s))
var p = '_'
for i, r := range s {
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
res = append(res, '_')
} else if unicode.IsUpper(r) && i > 0 {
if unicode.IsLetter(p) && !unicode.IsUpper(p) || unicode.IsDigit(p) {
res = append(res, '_', unicode.ToLower(r))
} else {
res = append(res, unicode.ToLower(r))
}
} else {
res = append(res, unicode.ToLower(r))
}
p = r
}
return string(res)
}
Benchmarks
func BenchmarkToSnakeCaseRegex(b *testing.B) {
for i := 0; i < b.N; i++ {
ToSnakeCaseRegex("BatteryLifeValue")
}
}
func BenchmarkToSnakeCase(b *testing.B) {
for i := 0; i < b.N; i++ {
ToSnakeCase("BatteryLifeValue")
}
}
Results
BenchmarkToSnakeCaseRegex-12 360540 3282 ns/op 282 B/op 11 allocs/op
BenchmarkToSnakeCase-12 1726478 714 ns/op 224 B/op 3 allocs/op
Hi, I make one more, but it pass all test, and better performance.
source file in here: https://gist.github.com/hxsf/7f5392c0153d3a8607c42eefed02b8cd
Bench
> go test -benchmem -bench 'One'
goos: darwin
goarch: amd64
pkg: mesh-sidecar/cmd/vars
cpu: Intel(R) Core(TM) i7-7700 CPU @ 3.60GHz
BenchmarkOneCase/ToSnakeCaseRegex-8 490156 2403 ns/op 305 B/op 12 allocs/op
BenchmarkOneCase/ToSnakeCaseByJensSkipr-8 4163742 286.1 ns/op 216 B/op 3 allocs/op
BenchmarkOneCase/ToSnakeCase-8 8867581 131.4 ns/op 24 B/op 1 allocs/op
BenchmarkAllInOne/ToSnakeCaseRegex-8 51538 22480 ns/op 2307 B/op 137 allocs/op
BenchmarkAllInOne/ToSnakeCaseByJensSkipr-8 395026 3051 ns/op 1352 B/op 41 allocs/op
BenchmarkAllInOne/ToSnakeCase-8 857940 1491 ns/op 232 B/op 17 allocs/op
PASS
ok mesh-sidecar/cmd/vars 7.930s
@hxsf looks awesome! Yeah, I forgot about additional capacity for underscores %)
@hxsf: That looks great! Do you mind adding a license to your gist?
@adombeck add MIT license.
@hxsf: Great, thanks a lot!
@hxsf Test failed for
MyLIFEIsAwesomE
should be my_life_is_awesom_e
but got mylife_is_awesome
Japan125Canada130Australia150
should be japan125_canada130_australia150
but got japan1_2_5_canada1_3_0_australia1_5_0
@elvizlai I update the code. and it works
good guys ! keep it up!
kebab case should be concluded here as well
@abdennour just replace '_' with '-' in the code, and change the function name. I think it will work.
Fails for the input
already_Snake