Skip to content

Instantly share code, notes, and snippets.

@johtani
Created October 29, 2024 01:28
Show Gist options
  • Save johtani/6d9979e9c0afcf48537c7d10b2fafabb to your computer and use it in GitHub Desktop.
Save johtani/6d9979e9c0afcf48537c7d10b2fafabb to your computer and use it in GitHub Desktop.
GSE difference between dictionaries
package main
import (
"fmt"
"github.com/go-ego/gse"
)
func main() {
var segJa, segZh gse.Segmenter
segZh.LoadDict()
segJa.LoadDict("ja")
text := "本报讯(首席记者 赵芳洲)平安杭州建设20周年大会昨日下午召开。省委副书记、市委书记刘捷"
fmt.Println("With default dictionary")
tokenizeByGse(text, segZh)
fmt.Println("With Japanese dictionary")
tokenizeByGse(text, segJa)
fmt.Println("With default dictionary")
tokenizeByGse(text, segZh, true)
fmt.Println("With Japanese dictionary")
tokenizeByGse(text, segJa, true)
fmt.Println("With default dictionary")
tokenizeByGseCutSearch(text, segZh)
fmt.Println("With Japanese dictionary")
tokenizeByGseCutSearch(text, segJa)
fmt.Println("With default dictionary")
tokenizeByGseCutSearch(text, segZh, true)
fmt.Println("With Japanese dictionary")
tokenizeByGseCutSearch(text, segJa, true)
}
func tokenizeByGse(text string, seg gse.Segmenter, hmm ...bool) {
if len(hmm) > 0 {
fmt.Println(fmt.Sprintf("------- GSE cut / hmm[%v] --------", hmm[0]))
fmt.Println(seg.Cut(text, hmm[0]))
} else {
fmt.Println("------- GSE cut --------")
fmt.Println(seg.Cut(text))
}
}
func tokenizeByGseCutSearch(text string, seg gse.Segmenter, hmm ...bool) {
if len(hmm) > 0 {
fmt.Println(fmt.Sprintf("------- GSE cut search / hmm[%v] --------", hmm[0]))
fmt.Println(seg.CutSearch(text, hmm[0]))
} else {
fmt.Println("------- GSE cut search --------")
fmt.Println(seg.CutSearch(text))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment