Created
May 21, 2012 10:40
-
-
Save vhyza/2761769 to your computer and use it in GitHub Desktop.
Elasticsearch czech analyzer test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'tire' | |
class Article | |
include Tire::Model::Persistence | |
# Jen zmena nazvu indexu at nahodou nesmazes nejaky svuj existujici 'articles' index | |
# | |
index_name 'articles-czech-analyzer-test' | |
# Property s ceskym analyzerem | |
# | |
property :title, type: "string", analyzer: 'czech' | |
# Je mozno definovat property s vice analyzery a podle nich pak hledat. Viz nize | |
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type.html | |
# | |
property :content, type: "multi_field", | |
fields: { | |
english: { type: 'string', analyzer: 'snowball' }, | |
czech: { type: 'string', analyzer: 'czech' } } | |
end | |
# Znovuvytvoreni indexu se spravnym mappingem | |
# | |
Article.index.delete | |
Article.create_elasticsearch_index | |
# Obcas ES vyhodi, ze nema dostupne shardy ihned po vytvoreni indexu | |
# | |
sleep 1 | |
# Vytvoreni par clanku | |
# | |
Article.create title: 'Vánoční stromky', content: 'Lorem ipsum dolor sit amet' | |
Article.create title: 'Bez nadpisu', content: 'Vánoční stromky' | |
# Jen pro jistotu refresh indexu at je jistota, ze to ES jiz ulozil | |
# | |
Article.index.refresh | |
# Hledame v title podle ceskeho analyzeru | |
# | |
# http://localhost:9200/articles-czech-analyzer-test/_analyze?text=V%C3%A1no%C4%8Dn%C3%AD%20stromky&field=title | |
# jde videt, na jake tokeny se rozpadne | |
# | |
puts "Podle title\n---" | |
p Article.search 'title:stromek' | |
puts '', "Podle content.english\n---" | |
p Article.search 'content.english:stromek' | |
puts '', "Podle content.czech\n---" | |
p Article.search 'content.czech:stromek' | |
puts '', "Je mozno hledat nad celym multifieldem a ES pouzije spravne analyzery pro kazdy field multifieldu\n---" | |
result = Article.search do | |
query { string 'stromek', fields: ['content.*'] } | |
end | |
p result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment