Last active
November 21, 2019 05:46
-
-
Save prologic/6b4f4943a912d8c8641e271c18329f28 to your computer and use it in GitHub Desktop.
Split a string into tokens with a given separator supporting escape sequences
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"errors" | |
"fmt" | |
) | |
func tokenizeString(s string, sep, escape rune) (tokens []string, err error) { | |
var runes []rune | |
inEscape := false | |
for _, r := range s { | |
switch { | |
case inEscape: | |
inEscape = false | |
fallthrough | |
default: | |
runes = append(runes, r) | |
case r == escape: | |
inEscape = true | |
case r == sep: | |
tokens = append(tokens, string(runes)) | |
runes = runes[:0] | |
} | |
} | |
tokens = append(tokens, string(runes)) | |
if inEscape { | |
err = errors.New("invalid terminal escape") | |
} | |
return tokens, err | |
} | |
func main() { | |
const sample = `one \ two three` | |
const separator = ' ' | |
const escape = '\\' | |
fmt.Printf("Input: %q\n", sample) | |
tokens, err := tokenizeString(sample, separator, escape) | |
if err != nil { | |
fmt.Println("error:", err) | |
} else { | |
fmt.Printf("Tokens: %q\n", tokens) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is most useful when say you want to split a string up into tokens (tokenize) separated by a space but allow for the separator to be escaped.
Example: