Last active
April 10, 2017 16:37
-
-
Save sonalkr132/920a4b934614c5c85604f7b18120e98f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Custom Analyzer for ActiveRecord integration with Elasticsearch | |
# =============================================================== | |
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__) | |
require 'ansi' | |
require 'logger' | |
require 'active_record' | |
require 'elasticsearch/model' | |
ActiveRecord::Base.logger = ActiveSupport::Logger.new(STDOUT) | |
ActiveRecord::Base.establish_connection( adapter: 'sqlite3', database: ":memory:" ) | |
ActiveRecord::Schema.define(version: 1) do | |
create_table :articles do |t| | |
t.string :title | |
t.date :published_at | |
t.timestamps | |
end | |
end | |
Elasticsearch::Model.client.transport.logger = ActiveSupport::Logger.new(STDOUT) | |
Elasticsearch::Model.client.transport.logger.formatter = lambda { |s, d, p, m| "#{m.ansi(:faint)}\n" } | |
class Article < ActiveRecord::Base | |
include Elasticsearch::Model | |
settings index: { | |
number_of_shards: 1, | |
number_of_replicas: 0, | |
analysis: { | |
analyzer: { | |
rubygem: { | |
type: 'pattern', | |
pattern: "[\s#{Regexp.escape('.-_')}]+", | |
lowercase: true | |
} | |
} | |
} } do | |
mapping do | |
indexes :title, type: 'text' do | |
indexes :title, analyzer: 'rubygem' | |
indexes :suggest, analyzer: 'simple' # Add to require use of block when specifing mulitple analyzers | |
end | |
end | |
end | |
end | |
# Create example records | |
# | |
Article.delete_all | |
Article.create title: 'Foo' | |
Article.create title: 'Foo_Bar_Baz' | |
Article.create title: 'Foo-Bar-Baz' | |
Article.create title: 'Bar' | |
# Index records | |
# | |
errors = Article.import force: true, refresh: true, return: 'errors' | |
puts "[!] Errors importing records: #{errors.map { |d| d['index']['error'] }.join(', ')}".ansi(:red) && exit(1) unless errors.empty? | |
puts '', '-'*80 | |
puts "Fulltext analyzer [Foo_Bar_1]:".ansi(:bold), | |
Article.__elasticsearch__.client.indices | |
.analyze(index: Article.index_name, field: 'title', text: 'Foo_Bar_1')['tokens'] | |
.map { |d| d['token'] }.join(', '), | |
"\n" | |
puts "Fulltext analyzer [Foo-Bar-1]:".ansi(:bold), | |
Article.__elasticsearch__.client.indices | |
.analyze(index: Article.index_name, field: 'title', text: 'Foo-Bar-1')['tokens'] | |
.map { |d| d['token'] }.join(', '), | |
"\n" | |
puts "Rubygem analyzer [Foo_Bar_1]:".ansi(:bold), | |
Article.__elasticsearch__.client.indices | |
.analyze(index: Article.index_name, field: 'title.title', text: 'Foo_Bar_1')['tokens'] | |
.map { |d| d['token'] }.join(', '), | |
"\n" | |
puts '', '-'*80 | |
response = Article.search 'foo'; | |
puts "Simple search for 'foo':".ansi(:bold), | |
response.records.map { |d| d.title }.inspect, | |
"\n" | |
puts '', '-'*80 | |
response = Article.search query: { match: { 'title' => 'foo' } } ; | |
puts "Search `title` for 'foo':".ansi(:bold), | |
response.records.map { |d| d.title }.inspect, | |
"\n" | |
puts '', '-'*80 | |
response = Article.search query: { match: { 'title.title' => 'foo' } } ; | |
puts "Search `title.title` for 'foo':".ansi(:bold), | |
response.records.map { |d| d.title }.inspect, | |
"\n" | |
puts '', '-'*80 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-------------------------------------------------------------------------------- | |
GET http://localhost:9200/articles/_analyze?field=title&index=articles&text=Foo_Bar_1 [status:200, request:0.020s, query:n/a] | |
< {"tokens":[{"token":"foo_bar_1","start_offset":0,"end_offset":9,"type":"<ALPHANUM>","position":0}]} | |
Fulltext analyzer [Foo_Bar_1]: | |
foo_bar_1 | |
GET http://localhost:9200/articles/_analyze?field=title&index=articles&text=Foo-Bar-1 [status:200, request:0.013s, query:n/a] | |
< {"tokens":[{"token":"foo","start_offset":0,"end_offset":3,"type":"<ALPHANUM>","position":0},{"token":"bar","start_offset":4,"end_offset":7,"type":"<ALPHANUM>","position":1},{"token":"1","start_offset":8,"end_offset":9,"type":"<NUM>","position":2}]} | |
Fulltext analyzer [Foo-Bar-1]: | |
foo, bar, 1 | |
GET http://localhost:9200/articles/_analyze?field=title.title&index=articles&text=Foo_Bar_1 [status:200, request:0.009s, query:n/a] | |
< {"tokens":[{"token":"foo","start_offset":0,"end_offset":3,"type":"word","position":0},{"token":"bar","start_offset":4,"end_offset":7,"type":"word","position":1},{"token":"1","start_offset":8,"end_offset":9,"type":"word","position":2}]} | |
Rubygem analyzer [Foo_Bar_1]: | |
foo, bar, 1 | |
-------------------------------------------------------------------------------- | |
GET http://localhost:9200/articles/article/_search?q=foo [status:200, request:0.008s, query:0.003s] | |
< {"took":3,"timed_out":false,"_shards":{"total":1,"successful":1,"failed":0},"hits":{"total":2,"max_score":0.62191015,"hits":[{"_index":"articles","_type":"article","_id":"1","_score":0.62191015,"_source":{"id":1,"title":"Foo","published_at":null,"created_at":"2017-04-10T16:24:18.530Z","updated_at":"2017-04-10T16:24:18.530Z"}},{"_index":"articles","_type":"article","_id":"3","_score":0.62191015,"_source":{"id":3,"title":"Foo-Bar-Baz","published_at":null,"created_at":"2017-04-10T16:24:18.533Z","updated_at":"2017-04-10T16:24:18.533Z"}}]}} | |
Article Load (0.4ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" IN (1, 3) | |
Simple search for 'foo': | |
["Foo", "Foo-Bar-Baz"] | |
-------------------------------------------------------------------------------- | |
GET http://localhost:9200/articles/article/_search [status:200, request:0.011s, query:0.005s] | |
> {"query":{"match":{"title":"foo"}}} | |
< {"took":5,"timed_out":false,"_shards":{"total":1,"successful":1,"failed":0},"hits":{"total":2,"max_score":0.80259144,"hits":[{"_index":"articles","_type":"article","_id":"1","_score":0.80259144,"_source":{"id":1,"title":"Foo","published_at":null,"created_at":"2017-04-10T16:24:18.530Z","updated_at":"2017-04-10T16:24:18.530Z"}},{"_index":"articles","_type":"article","_id":"3","_score":0.41214156,"_source":{"id":3,"title":"Foo-Bar-Baz","published_at":null,"created_at":"2017-04-10T16:24:18.533Z","updated_at":"2017-04-10T16:24:18.533Z"}}]}} | |
Article Load (0.3ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" IN (1, 3) | |
Search `title` for 'foo': | |
["Foo", "Foo-Bar-Baz"] | |
-------------------------------------------------------------------------------- | |
GET http://localhost:9200/articles/article/_search [status:200, request:0.010s, query:0.002s] | |
> {"query":{"match":{"title.title":"foo"}}} | |
< {"took":2,"timed_out":false,"_shards":{"total":1,"successful":1,"failed":0},"hits":{"total":3,"max_score":0.44839138,"hits":[{"_index":"articles","_type":"article","_id":"1","_score":0.44839138,"_source":{"id":1,"title":"Foo","published_at":null,"created_at":"2017-04-10T16:24:18.530Z","updated_at":"2017-04-10T16:24:18.530Z"}},{"_index":"articles","_type":"article","_id":"2","_score":0.25312415,"_source":{"id":2,"title":"Foo_Bar_Baz","published_at":null,"created_at":"2017-04-10T16:24:18.531Z","updated_at":"2017-04-10T16:24:18.531Z"}},{"_index":"articles","_type":"article","_id":"3","_score":0.25312415,"_source":{"id":3,"title":"Foo-Bar-Baz","published_at":null,"created_at":"2017-04-10T16:24:18.533Z","updated_at":"2017-04-10T16:24:18.533Z"}}]}} | |
Article Load (0.3ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" IN (1, 2, 3) | |
Search `title.title` for 'foo': | |
["Foo", "Foo_Bar_Baz", "Foo-Bar-Baz"] | |
-------------------------------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment