Skip to content

Instantly share code, notes, and snippets.

@sonalkr132
Last active April 10, 2017 16:37
Show Gist options
  • Save sonalkr132/920a4b934614c5c85604f7b18120e98f to your computer and use it in GitHub Desktop.
Save sonalkr132/920a4b934614c5c85604f7b18120e98f to your computer and use it in GitHub Desktop.
# Custom Analyzer for ActiveRecord integration with Elasticsearch
# ===============================================================
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
require 'ansi'
require 'logger'
require 'active_record'
require 'elasticsearch/model'
ActiveRecord::Base.logger = ActiveSupport::Logger.new(STDOUT)
ActiveRecord::Base.establish_connection( adapter: 'sqlite3', database: ":memory:" )
ActiveRecord::Schema.define(version: 1) do
create_table :articles do |t|
t.string :title
t.date :published_at
t.timestamps
end
end
Elasticsearch::Model.client.transport.logger = ActiveSupport::Logger.new(STDOUT)
Elasticsearch::Model.client.transport.logger.formatter = lambda { |s, d, p, m| "#{m.ansi(:faint)}\n" }
class Article < ActiveRecord::Base
include Elasticsearch::Model
settings index: {
number_of_shards: 1,
number_of_replicas: 0,
analysis: {
analyzer: {
rubygem: {
type: 'pattern',
pattern: "[\s#{Regexp.escape('.-_')}]+",
lowercase: true
}
}
} } do
mapping do
indexes :title, type: 'text' do
indexes :title, analyzer: 'rubygem'
indexes :suggest, analyzer: 'simple' # Add to require use of block when specifing mulitple analyzers
end
end
end
end
# Create example records
#
Article.delete_all
Article.create title: 'Foo'
Article.create title: 'Foo_Bar_Baz'
Article.create title: 'Foo-Bar-Baz'
Article.create title: 'Bar'
# Index records
#
errors = Article.import force: true, refresh: true, return: 'errors'
puts "[!] Errors importing records: #{errors.map { |d| d['index']['error'] }.join(', ')}".ansi(:red) && exit(1) unless errors.empty?
puts '', '-'*80
puts "Fulltext analyzer [Foo_Bar_1]:".ansi(:bold),
Article.__elasticsearch__.client.indices
.analyze(index: Article.index_name, field: 'title', text: 'Foo_Bar_1')['tokens']
.map { |d| d['token'] }.join(', '),
"\n"
puts "Fulltext analyzer [Foo-Bar-1]:".ansi(:bold),
Article.__elasticsearch__.client.indices
.analyze(index: Article.index_name, field: 'title', text: 'Foo-Bar-1')['tokens']
.map { |d| d['token'] }.join(', '),
"\n"
puts "Rubygem analyzer [Foo_Bar_1]:".ansi(:bold),
Article.__elasticsearch__.client.indices
.analyze(index: Article.index_name, field: 'title.title', text: 'Foo_Bar_1')['tokens']
.map { |d| d['token'] }.join(', '),
"\n"
puts '', '-'*80
response = Article.search 'foo';
puts "Simple search for 'foo':".ansi(:bold),
response.records.map { |d| d.title }.inspect,
"\n"
puts '', '-'*80
response = Article.search query: { match: { 'title' => 'foo' } } ;
puts "Search `title` for 'foo':".ansi(:bold),
response.records.map { |d| d.title }.inspect,
"\n"
puts '', '-'*80
response = Article.search query: { match: { 'title.title' => 'foo' } } ;
puts "Search `title.title` for 'foo':".ansi(:bold),
response.records.map { |d| d.title }.inspect,
"\n"
puts '', '-'*80
--------------------------------------------------------------------------------
GET http://localhost:9200/articles/_analyze?field=title&index=articles&text=Foo_Bar_1 [status:200, request:0.020s, query:n/a]
< {"tokens":[{"token":"foo_bar_1","start_offset":0,"end_offset":9,"type":"<ALPHANUM>","position":0}]}
Fulltext analyzer [Foo_Bar_1]:
foo_bar_1
GET http://localhost:9200/articles/_analyze?field=title&index=articles&text=Foo-Bar-1 [status:200, request:0.013s, query:n/a]
< {"tokens":[{"token":"foo","start_offset":0,"end_offset":3,"type":"<ALPHANUM>","position":0},{"token":"bar","start_offset":4,"end_offset":7,"type":"<ALPHANUM>","position":1},{"token":"1","start_offset":8,"end_offset":9,"type":"<NUM>","position":2}]}
Fulltext analyzer [Foo-Bar-1]:
foo, bar, 1
GET http://localhost:9200/articles/_analyze?field=title.title&index=articles&text=Foo_Bar_1 [status:200, request:0.009s, query:n/a]
< {"tokens":[{"token":"foo","start_offset":0,"end_offset":3,"type":"word","position":0},{"token":"bar","start_offset":4,"end_offset":7,"type":"word","position":1},{"token":"1","start_offset":8,"end_offset":9,"type":"word","position":2}]}
Rubygem analyzer [Foo_Bar_1]:
foo, bar, 1
--------------------------------------------------------------------------------
GET http://localhost:9200/articles/article/_search?q=foo [status:200, request:0.008s, query:0.003s]
< {"took":3,"timed_out":false,"_shards":{"total":1,"successful":1,"failed":0},"hits":{"total":2,"max_score":0.62191015,"hits":[{"_index":"articles","_type":"article","_id":"1","_score":0.62191015,"_source":{"id":1,"title":"Foo","published_at":null,"created_at":"2017-04-10T16:24:18.530Z","updated_at":"2017-04-10T16:24:18.530Z"}},{"_index":"articles","_type":"article","_id":"3","_score":0.62191015,"_source":{"id":3,"title":"Foo-Bar-Baz","published_at":null,"created_at":"2017-04-10T16:24:18.533Z","updated_at":"2017-04-10T16:24:18.533Z"}}]}}
Article Load (0.4ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" IN (1, 3)
Simple search for 'foo':
["Foo", "Foo-Bar-Baz"]
--------------------------------------------------------------------------------
GET http://localhost:9200/articles/article/_search [status:200, request:0.011s, query:0.005s]
> {"query":{"match":{"title":"foo"}}}
< {"took":5,"timed_out":false,"_shards":{"total":1,"successful":1,"failed":0},"hits":{"total":2,"max_score":0.80259144,"hits":[{"_index":"articles","_type":"article","_id":"1","_score":0.80259144,"_source":{"id":1,"title":"Foo","published_at":null,"created_at":"2017-04-10T16:24:18.530Z","updated_at":"2017-04-10T16:24:18.530Z"}},{"_index":"articles","_type":"article","_id":"3","_score":0.41214156,"_source":{"id":3,"title":"Foo-Bar-Baz","published_at":null,"created_at":"2017-04-10T16:24:18.533Z","updated_at":"2017-04-10T16:24:18.533Z"}}]}}
Article Load (0.3ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" IN (1, 3)
Search `title` for 'foo':
["Foo", "Foo-Bar-Baz"]
--------------------------------------------------------------------------------
GET http://localhost:9200/articles/article/_search [status:200, request:0.010s, query:0.002s]
> {"query":{"match":{"title.title":"foo"}}}
< {"took":2,"timed_out":false,"_shards":{"total":1,"successful":1,"failed":0},"hits":{"total":3,"max_score":0.44839138,"hits":[{"_index":"articles","_type":"article","_id":"1","_score":0.44839138,"_source":{"id":1,"title":"Foo","published_at":null,"created_at":"2017-04-10T16:24:18.530Z","updated_at":"2017-04-10T16:24:18.530Z"}},{"_index":"articles","_type":"article","_id":"2","_score":0.25312415,"_source":{"id":2,"title":"Foo_Bar_Baz","published_at":null,"created_at":"2017-04-10T16:24:18.531Z","updated_at":"2017-04-10T16:24:18.531Z"}},{"_index":"articles","_type":"article","_id":"3","_score":0.25312415,"_source":{"id":3,"title":"Foo-Bar-Baz","published_at":null,"created_at":"2017-04-10T16:24:18.533Z","updated_at":"2017-04-10T16:24:18.533Z"}}]}}
Article Load (0.3ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" IN (1, 2, 3)
Search `title.title` for 'foo':
["Foo", "Foo_Bar_Baz", "Foo-Bar-Baz"]
--------------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment