Skip to content

Instantly share code, notes, and snippets.

@olivoil
Forked from karmi/ngrams-and-tire.rb
Created June 5, 2012 19:49
Show Gist options
  • Save olivoil/2877321 to your computer and use it in GitHub Desktop.
Save olivoil/2877321 to your computer and use it in GitHub Desktop.
An example of using ngram analysis in ElasticSearch with the Tire rubygem
# An example of using ngram analysis in ElasticSearch with the Tire rubygem
# ==========================================================================
# The original, raw example: https://gist.github.com/988923
require 'rubygems'
require 'tire'
require 'yajl/json_gem'
class URL
def initialize(attributes={})
@attributes = attributes
end
def type
'url'
end
def to_indexed_json
@attributes.to_json
end
end
Tire.index('ngrams-and-tire') do
delete
create :settings => {
"index" => {
"analysis" => {
"filter" => {
# Let's define a custom ngram filter
#
"url_ngram" => {
"type" => "nGram",
"max_gram" => 5,
"min_gram" => 3},
# Let's define a custom stop words filter
#
"url_stop" => {
"type" => "stop",
"stopwords" => ["http", "https"]
}
},
"analyzer" => {
"url_analyzer" => {
# First, lowercase everything with the built-in tokenizer
#
"tokenizer" => "lowercase",
# Then, define our analyzer chain: remove generic stopwords,
# remove URL specific stopwords, apply our custom ngram filter
#
"filter" => ["stop", "url_stop", "url_ngram"],
"type" => "custom"
}
}
}
}
},
:mappings => {
"url" => {
"properties" => {
"url" => {
"boost" => 10,
"type" => "string",
# Let's use our custom analyzer for the `url` field
#
"analyzer" => "url_analyzer"
}
}
}
}
store URL.new :url => "http://urlaubinkroatien.de"
store URL.new :url => "http://besteurlaubinkroatien.de"
store URL.new :url => "http://kroatien.de"
refresh
end
s = Tire.search('ngrams-and-tire') { query { string 'url:urlaub' } }
puts "QUERY > url:urlaub",
s.results.map(&:url).inspect,
""
s = Tire.search('ngrams-and-tire') { query { string 'url:kroatien' } }
puts "QUERY > url:kroatien",
s.results.map(&:url).inspect,
""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment