Created
October 13, 2015 09:16
-
-
Save klauspost/be90532e3f3242d3014a to your computer and use it in GitHub Desktop.
Hashtag identifier & splitter.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Match tags in notes, etc. | |
// Group 1: Must start with whitespace OR start of string | |
// - : Must start with hash sign # | |
// Group 2: Match everything, until we reach a whitespace, '#' '.' ',' '!' ')'. | |
// Must be end of string | |
var matchTags = regexp.MustCompile(`[^\S]|^#([^\s#.,!)]+)$`) | |
// tagsSplitter returns true if the current rune is a tag ending | |
// Tags MUST end with whitespace, '.' ',' '!' or ')' | |
func tagsSplitter(c rune) bool { | |
if unicode.IsSpace(c) { | |
return true | |
} | |
switch c { | |
case '.', ',', '!', ')': | |
return true | |
} | |
return false | |
} | |
// getTags matches tags and returns them as an array of strings | |
// | |
// The hashtag itself is NOT included as part of the tag string | |
// | |
// The function should match the javascript regex: '/([^\S]|^)#([^\s#.,!)]+)(?![^\s.,!)])/g'. | |
// Since golang re2 engine does not have positive lookahead, the end of the tag is matched by splitting the input string. | |
// The 'tagsSplitter' function defines the end of a tag, and the 'matchTags' regex has a requirement that it must match the end of a string. | |
func getTags(s string) []string { | |
res := make([]string, 0) | |
fields := strings.FieldsFunc(s, tagsSplitter) | |
for _, v := range fields { | |
sub := matchTags.FindStringSubmatch(v) | |
if len(sub) > 1 { | |
res = append(res, sub[1]) | |
} | |
} | |
return res | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment