Last active
May 14, 2024 15:57
-
-
Save oblank/73f31be226bf0680ddeb to your computer and use it in GitHub Desktop.
bleve结合 jieba 分词实现中文分词
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"github.com/blevesearch/bleve" | |
_ "github.com/wangbin/jiebago/analyse/tokenizers" | |
"log" | |
) | |
func main() { | |
// open a new index | |
indexMapping := bleve.NewIndexMapping() | |
err := indexMapping.AddCustomTokenizer("jieba", | |
map[string]interface{}{ | |
"file": "/Users/wangbin/mygo/src/github.com/wangbin/jiebago/dict.txt", | |
"type": "jieba", | |
}) | |
if err != nil { | |
log.Fatal(err) | |
} | |
err = indexMapping.AddCustomAnalyzer("jieba", | |
map[string]interface{}{ | |
"type": "custom", | |
"tokenizer": "jieba", | |
"token_filters": []string{ | |
"possessive_en", | |
"to_lower", | |
"stop_en", | |
}, | |
}) | |
if err != nil { | |
log.Fatal(err) | |
} | |
indexMapping.DefaultAnalyzer = "jieba" | |
index, err := bleve.New("example.bleve", indexMapping) | |
if err != nil { | |
log.Fatal(err) | |
} | |
docs := []struct { | |
Title string | |
Name string | |
}{ | |
{ | |
Title: "Doc 1", | |
Name: "This is the first document we’ve added", | |
}, | |
{ | |
Title: "Doc 2", | |
Name: "The second one 你 中文测试中文 is even more interesting! 吃水果", | |
}, | |
{ | |
Title: "Doc 3", | |
Name: "买水果然后来世博园。", | |
}, | |
{ | |
Title: "Doc 4", | |
Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作", | |
}, | |
{ | |
Title: "Doc 5", | |
Name: "咱俩交换一下吧。", | |
}, | |
} | |
// index docs | |
for _, doc := range docs { | |
index.Index(doc.Title, doc) | |
} | |
// search for some text | |
for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} { | |
query := bleve.NewMatchQuery(keyword) | |
search := bleve.NewSearchRequest(query) | |
search.Highlight = bleve.NewHighlight() | |
searchResults, err := index.Search(search) | |
if err != nil { | |
log.Fatal(err) | |
} | |
fmt.Printf("Result of %s: %s\n", keyword, searchResults) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment