Created
May 25, 2011 19:19
-
-
Save hohyon-ryu/991684 to your computer and use it in GitHub Desktop.
Stemming for ElasticSearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Script to test stemming for ElasticSearch. Working now!! | |
# Reference: http://stackoverflow.com/questions/4981001/why-elasticsearch-is-not-finding-my-term | |
require 'rubygems' | |
require 'net/http' | |
require 'yaml' | |
require 'json' | |
# kill the index | |
delete = Net::HTTP::Delete.new("/willindex") | |
# create again | |
create_index = Net::HTTP::Post.new("/willindex") | |
create_index.body = { | |
"index"=> | |
{ "number_of_shards"=> 1, | |
"analysis"=> { | |
"filter"=> { | |
"snowball"=> { | |
"type"=> "snowball", | |
"language"=> "English" | |
} | |
}, | |
"analyzer"=> { | |
"a_stemming"=> { | |
"type"=>"custom", | |
"tokenizer"=> "standard", | |
"filter"=> ["lowercase", "snowball"] | |
} | |
} | |
} | |
} | |
}.to_yaml | |
puts create_index.body | |
#Mapping was missing in the previous test | |
mapping = Net::HTTP::Put.new("/willindex/_mapping") | |
mapping.body={ | |
"willdoc"=> { | |
"_all"=> { | |
"type"=> "string", | |
"analyzer"=> "a_stemming" | |
} | |
} | |
}.to_json | |
# index a record | |
index_record=Array.new | |
index_record[1] = Net::HTTP::Put.new("/willindex/willdoc/1") | |
index_record[1].body = {"text"=> "i love to walk"}.to_json | |
index_record[2] = Net::HTTP::Put.new("/willindex/willdoc/2") | |
index_record[2].body = {"text"=> "I love WALKING"}.to_json | |
index_record[3] = Net::HTTP::Put.new("/willindex/willdoc/3") | |
index_record[3].body = {"text"=> "i loved to walk"}.to_json | |
index_record[4] = Net::HTTP::Put.new("/willindex/willdoc/4") | |
index_record[4].body = {"text"=> "it is walkable"}.to_json | |
GetIndex = Net::HTTP::Get.new("/willindex/_status?pretty=true") | |
# do a search which requires stemming to succeed | |
# "q=walks" does not work! It has to be "q=text:walk" | |
search = Net::HTTP::Get.new("/willindex/_search?pretty=true&q=loving") | |
# perform requests | |
Net::HTTP.start("localhost", 9200) do |http| | |
# uncomment one of the following lines to switch between "index mode" and "search mode" | |
[delete, create_index, mapping, index_record[1], index_record[2], index_record[3], index_record[4]].each do |request| | |
#[search].each do |request| | |
puts http.request(request) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment