Skip to content

Instantly share code, notes, and snippets.

@ythosa
Last active October 14, 2025 16:17
Show Gist options
  • Save ythosa/3647493c08ac05124a5647d3535da613 to your computer and use it in GitHub Desktop.
Save ythosa/3647493c08ac05124a5647d3535da613 to your computer and use it in GitHub Desktop.
Effective way to find `hashtags` in raw text in Go
func ParseHashtags(input string) string {
const minHashtagLength = 3
var foundHashtags []string
searchHashtag:
for {
hashtagIndex := strings.Index(input, "#")
if hashtagIndex < 0 {
break // no hashtags found
}
// if string is not starts from #, we should trim allowed chars for prefix
if hashtagIndex > 0 {
lastSpace := max(0, strings.LastIndexFunc(input[:hashtagIndex], unicode.IsSpace))
for _, r := range input[lastSpace:hashtagIndex] {
if unicode.IsLetter(r) || unicode.IsDigit(r) {
input = input[hashtagIndex+1:] // found hashtag is invalid
continue searchHashtag
}
}
}
// make the '#' as first character!
input = input[hashtagIndex:]
// squash sequence of '#' characters
hashtagsCount := 0
for _, r := range input {
if r != '#' {
break
}
hashtagsCount++
}
input = input[hashtagsCount-1:]
// validate hashtag body (we can drop first byte because of it is '#')
bodyEnds := 0
for i, r := range input[1:] {
// first char after '#' must be letter
if i == 0 && !unicode.IsLetter(r) {
input = input[1:]
continue searchHashtag
}
if unicode.IsLetter(r) || unicode.IsDigit(r) {
bodyEnds = i + utf8.RuneLen(r) // hashtag must ends on letter or digit
} else if r != '_' && r != '.' && r != '-' {
break
}
}
if bodyEnds >= minHashtagLength {
foundHashtags = append(foundHashtags, input[:bodyEnds+1])
}
input = input[bodyEnds+1:]
}
return strings.Join(foundHashtags, " ")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment