Created
October 29, 2024 01:28
-
-
Save johtani/6d9979e9c0afcf48537c7d10b2fafabb to your computer and use it in GitHub Desktop.
GSE difference between dictionaries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"github.com/go-ego/gse" | |
) | |
func main() { | |
var segJa, segZh gse.Segmenter | |
segZh.LoadDict() | |
segJa.LoadDict("ja") | |
text := "本报讯(首席记者 赵芳洲)平安杭州建设20周年大会昨日下午召开。省委副书记、市委书记刘捷" | |
fmt.Println("With default dictionary") | |
tokenizeByGse(text, segZh) | |
fmt.Println("With Japanese dictionary") | |
tokenizeByGse(text, segJa) | |
fmt.Println("With default dictionary") | |
tokenizeByGse(text, segZh, true) | |
fmt.Println("With Japanese dictionary") | |
tokenizeByGse(text, segJa, true) | |
fmt.Println("With default dictionary") | |
tokenizeByGseCutSearch(text, segZh) | |
fmt.Println("With Japanese dictionary") | |
tokenizeByGseCutSearch(text, segJa) | |
fmt.Println("With default dictionary") | |
tokenizeByGseCutSearch(text, segZh, true) | |
fmt.Println("With Japanese dictionary") | |
tokenizeByGseCutSearch(text, segJa, true) | |
} | |
func tokenizeByGse(text string, seg gse.Segmenter, hmm ...bool) { | |
if len(hmm) > 0 { | |
fmt.Println(fmt.Sprintf("------- GSE cut / hmm[%v] --------", hmm[0])) | |
fmt.Println(seg.Cut(text, hmm[0])) | |
} else { | |
fmt.Println("------- GSE cut --------") | |
fmt.Println(seg.Cut(text)) | |
} | |
} | |
func tokenizeByGseCutSearch(text string, seg gse.Segmenter, hmm ...bool) { | |
if len(hmm) > 0 { | |
fmt.Println(fmt.Sprintf("------- GSE cut search / hmm[%v] --------", hmm[0])) | |
fmt.Println(seg.CutSearch(text, hmm[0])) | |
} else { | |
fmt.Println("------- GSE cut search --------") | |
fmt.Println(seg.CutSearch(text)) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment