Created
June 23, 2011 09:42
-
-
Save ofavre/1042252 to your computer and use it in GitHub Desktop.
Problem in highlighting with stemming analyzers using most default parameters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| curl -XDELETE 'localhost:9200/index' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index' -d '{"settings":{"index":{"number_of_shards":1,"number_of_replicas":0}}}' | |
| {"ok":true,"acknowledged":true} | |
| # The mapping I would like | |
| curl -XPUT 'localhost:9200/index/type/_mapping' -d '{ | |
| "type":{ | |
| "_source":{ "enabled":false }, | |
| "_analyzer":{ "path":"lang" }, | |
| "properties":{ | |
| "text":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"analyzed" | |
| }, | |
| "lang":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"not_analyzed" | |
| } | |
| } | |
| } | |
| }' | |
| {"ok":true,"acknowledged":true} | |
| curl -XGET 'localhost:9200/index/_analyze?pretty=1&analyzer=spanish' -d 'lola' | |
| { | |
| "tokens" : [ { | |
| "token" : "lol", | |
| "start_offset" : 0, | |
| "end_offset" : 4, | |
| "type" : "<ALPHANUM>", | |
| "position" : 1 | |
| } ] | |
| } | |
| curl -XPUT 'localhost:9200/index/type/docspanish' -d '{ | |
| "lang":"spanish", | |
| "text":"spanish lola stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docspanish","_version":1} | |
| curl -XGET 'localhost:9200/index/_analyze?pretty=1&analyzer=english' -d 'lol' | |
| { | |
| "tokens" : [ { | |
| "token" : "lol", | |
| "start_offset" : 0, | |
| "end_offset" : 3, | |
| "type" : "<ALPHANUM>", | |
| "position" : 1 | |
| } ] | |
| } | |
| curl -XPUT 'localhost:9200/index/type/docenglish' -d '{ | |
| "lang":"english", | |
| "text":"english lol stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docenglish","_version":1} | |
| curl -XPOST 'localhost:9200/index/_optimize?refresh=true&flush=true&wait_for_merge=true' | |
| {"ok":true,"_shards":{"total":1,"successful":1,"failed":0}} | |
| curl -XGET 'localhost:9200/index/type/_search?pretty=1&fields=*' -d '{ | |
| "query":{ | |
| "term":{ | |
| "text":"lol" | |
| } | |
| } | |
| }' | |
| { | |
| "took" : 1, | |
| "timed_out" : false, | |
| "_shards" : { | |
| "total" : 1, | |
| "successful" : 1, | |
| "failed" : 0 | |
| }, | |
| "hits" : { | |
| "total" : 2, | |
| "max_score" : 0.2972674, | |
| "hits" : [ { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docspanish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "spanish", | |
| "text" : "spanish lola stuff" | |
| } | |
| }, { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docenglish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "english", | |
| "text" : "english lol stuff" | |
| } | |
| } ] | |
| } | |
| } | |
| # Highlight not returned for the stemmed "lola" word | |
| curl -XGET 'localhost:9200/index/type/_search?pretty=1&fields=*' -d '{ | |
| "query":{ | |
| "term":{ | |
| "text": "lol" | |
| } | |
| }, | |
| "highlight":{ | |
| "fields":{ | |
| "text":{ "number_of_fragments":0 } | |
| } | |
| } | |
| }' | |
| { | |
| "took" : 1, | |
| "timed_out" : false, | |
| "_shards" : { | |
| "total" : 1, | |
| "successful" : 1, | |
| "failed" : 0 | |
| }, | |
| "hits" : { | |
| "total" : 2, | |
| "max_score" : 0.2972674, | |
| "hits" : [ { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docspanish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "spanish", | |
| "text" : "spanish lola stuff" | |
| } | |
| }, { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docenglish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "english", | |
| "text" : "english lol stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "english <em>lol</em> stuff" ] | |
| } | |
| } ] | |
| } | |
| } | |
| # test with _source disabled, term_vector=with_positions_offsets, fields stored | |
| curl -XDELETE 'localhost:9200/index' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index' -d '{"settings":{"index":{"number_of_shards":1,"number_of_replicas":0}}}' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index/type/_mapping' -d '{ | |
| "type":{ | |
| "_source":{ "enabled":false }, | |
| "_analyzer":{ "path":"lang" }, | |
| "properties":{ | |
| "text":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"analyzed", | |
| "term_vector":"with_positions_offsets" | |
| }, | |
| "lang":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"not_analyzed" | |
| } | |
| } | |
| } | |
| }' | |
| {"ok":true,"acknowledged":true} | |
| curl -XGET 'localhost:9200/index/_analyze?pretty=1&analyzer=spanish' -d 'lola' | |
| { | |
| "tokens" : [ { | |
| "token" : "lol", | |
| "start_offset" : 0, | |
| "end_offset" : 4, | |
| "type" : "<ALPHANUM>", | |
| "position" : 1 | |
| } ] | |
| } | |
| curl -XPUT 'localhost:9200/index/type/docspanish' -d '{ | |
| "lang":"spanish", | |
| "text":"spanish lola stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docspanish","_version":1} | |
| curl -XPUT 'localhost:9200/index/type/docenglish' -d '{ | |
| "lang":"english", | |
| "text":"english lol stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docenglish","_version":1} | |
| curl -XPOST 'localhost:9200/index/_optimize?refresh=true&flush=true&wait_for_merge=true' | |
| {"ok":true,"_shards":{"total":1,"successful":1,"failed":0}} | |
| curl -XGET 'localhost:9200/index/type/_search?pretty=1&fields=*' -d '{ | |
| "query":{ | |
| "term":{ | |
| "text": "lol" | |
| } | |
| }, | |
| "highlight":{ | |
| "fields":{ | |
| "text":{ "number_of_fragments":0 } | |
| } | |
| } | |
| }' | |
| { | |
| "took" : 1, | |
| "timed_out" : false, | |
| "_shards" : { | |
| "total" : 1, | |
| "successful" : 1, | |
| "failed" : 0 | |
| }, | |
| "hits" : { | |
| "total" : 2, | |
| "max_score" : 0.2972674, | |
| "hits" : [ { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docspanish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "spanish", | |
| "text" : "spanish lola stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "spanish <em>lola</em> stuff " ] | |
| } | |
| }, { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docenglish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "english", | |
| "text" : "english lol stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "english <em>lol</em> stuff " ] | |
| } | |
| } ] | |
| } | |
| } | |
| # test with _source enabled, term_vector=with_positions_offsets, fields stored | |
| curl -XDELETE 'localhost:9200/index' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index' -d '{"settings":{"index":{"number_of_shards":1,"number_of_replicas":0}}}' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index/type/_mapping' -d '{ | |
| "type":{ | |
| "_source":{ "enabled":true }, | |
| "_analyzer":{ "path":"lang" }, | |
| "properties":{ | |
| "text":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"analyzed", | |
| "term_vector":"with_positions_offsets" | |
| }, | |
| "lang":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"not_analyzed" | |
| } | |
| } | |
| } | |
| }' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index/type/docspanish' -d '{ | |
| "lang":"spanish", | |
| "text":"spanish lola stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docspanish","_version":1} | |
| curl -XPUT 'localhost:9200/index/type/docenglish' -d '{ | |
| "lang":"english", | |
| "text":"english lol stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docenglish","_version":1} | |
| curl -XPOST 'localhost:9200/index/_optimize?refresh=true&flush=true&wait_for_merge=true' | |
| {"ok":true,"_shards":{"total":1,"successful":1,"failed":0}} | |
| curl -XGET 'localhost:9200/index/type/_search?pretty=1&fields=*' -d '{ | |
| "query":{ | |
| "term":{ | |
| "text": "lol" | |
| } | |
| }, | |
| "highlight":{ | |
| "fields":{ | |
| "text":{ "number_of_fragments":0 } | |
| } | |
| } | |
| }' | |
| { | |
| "took" : 1, | |
| "timed_out" : false, | |
| "_shards" : { | |
| "total" : 1, | |
| "successful" : 1, | |
| "failed" : 0 | |
| }, | |
| "hits" : { | |
| "total" : 2, | |
| "max_score" : 0.2972674, | |
| "hits" : [ { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docspanish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "spanish", | |
| "text" : "spanish lola stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "spanish <em>lola</em> stuff " ] | |
| } | |
| }, { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docenglish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "english", | |
| "text" : "english lol stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "english <em>lol</em> stuff " ] | |
| } | |
| } ] | |
| } | |
| } | |
| # test with _source disabled, term_vector=no, fields stored | |
| curl -XDELETE 'localhost:9200/index' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index' -d '{"settings":{"index":{"number_of_shards":1,"number_of_replicas":0}}}' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index/type/_mapping' -d '{ | |
| "type":{ | |
| "_source":{ "enabled":false }, | |
| "_analyzer":{ "path":"lang" }, | |
| "properties":{ | |
| "text":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"analyzed", | |
| "term_vector":"no" | |
| }, | |
| "lang":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"not_analyzed" | |
| } | |
| } | |
| } | |
| }' | |
| {"ok":true,"acknowledged":true} | |
| curl -XGET 'localhost:9200/index/_analyze?pretty=1&analyzer=spanish' -d 'lola' | |
| { | |
| "tokens" : [ { | |
| "token" : "lol", | |
| "start_offset" : 0, | |
| "end_offset" : 4, | |
| "type" : "<ALPHANUM>", | |
| "position" : 1 | |
| } ] | |
| } | |
| curl -XPUT 'localhost:9200/index/type/docspanish' -d '{ | |
| "lang":"spanish", | |
| "text":"spanish lola stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docspanish","_version":1} | |
| curl -XPUT 'localhost:9200/index/type/docenglish' -d '{ | |
| "lang":"english", | |
| "text":"english lol stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docenglish","_version":1} | |
| curl -XPOST 'localhost:9200/index/_optimize?refresh=true&flush=true&wait_for_merge=true' | |
| {"ok":true,"_shards":{"total":1,"successful":1,"failed":0}} | |
| curl -XGET 'localhost:9200/index/type/_search?pretty=1&fields=*' -d '{ | |
| "query":{ | |
| "term":{ | |
| "text": "lol" | |
| } | |
| }, | |
| "highlight":{ | |
| "fields":{ | |
| "text":{ "number_of_fragments":0 } | |
| } | |
| } | |
| }' | |
| { | |
| "took" : 2, | |
| "timed_out" : false, | |
| "_shards" : { | |
| "total" : 1, | |
| "successful" : 1, | |
| "failed" : 0 | |
| }, | |
| "hits" : { | |
| "total" : 2, | |
| "max_score" : 0.2972674, | |
| "hits" : [ { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docspanish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "spanish", | |
| "text" : "spanish lola stuff" | |
| } | |
| }, { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docenglish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "english", | |
| "text" : "english lol stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "english <em>lol</em> stuff" ] | |
| } | |
| } ] | |
| } | |
| } | |
| # test with _source enabled, term_vector=no, fields stored | |
| curl -XDELETE 'localhost:9200/index' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index' -d '{"settings":{"index":{"number_of_shards":1,"number_of_replicas":0}}}' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index/type/_mapping' -d '{ | |
| "type":{ | |
| "_source":{ "enabled":true }, | |
| "_analyzer":{ "path":"lang" }, | |
| "properties":{ | |
| "text":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"analyzed", | |
| "term_vector":"no" | |
| }, | |
| "lang":{ | |
| "type":"string", | |
| "store":true, | |
| "index":"not_analyzed" | |
| } | |
| } | |
| } | |
| }' | |
| {"ok":true,"acknowledged":true} | |
| curl -XPUT 'localhost:9200/index/type/docspanish' -d '{ | |
| "lang":"spanish", | |
| "text":"spanish lola stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docspanish","_version":1} | |
| curl -XPUT 'localhost:9200/index/type/docenglish' -d '{ | |
| "lang":"english", | |
| "text":"english lol stuff" | |
| }' | |
| {"ok":true,"_index":"index","_type":"type","_id":"docenglish","_version":1} | |
| curl -XPOST 'localhost:9200/index/_optimize?refresh=true&flush=true&wait_for_merge=true' | |
| {"ok":true,"_shards":{"total":1,"successful":1,"failed":0}} | |
| curl -XGET 'localhost:9200/index/type/_search?pretty=1&fields=*' -d '{ | |
| "query":{ | |
| "term":{ | |
| "text": "lol" | |
| } | |
| }, | |
| "highlight":{ | |
| "fields":{ | |
| "text":{ "number_of_fragments":0 } | |
| } | |
| } | |
| }' | |
| { | |
| "took" : 2, | |
| "timed_out" : false, | |
| "_shards" : { | |
| "total" : 1, | |
| "successful" : 1, | |
| "failed" : 0 | |
| }, | |
| "hits" : { | |
| "total" : 2, | |
| "max_score" : 0.2972674, | |
| "hits" : [ { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docspanish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "spanish", | |
| "text" : "spanish lola stuff" | |
| } | |
| }, { | |
| "_index" : "index", | |
| "_type" : "type", | |
| "_id" : "docenglish", | |
| "_score" : 0.2972674, | |
| "fields" : { | |
| "lang" : "english", | |
| "text" : "english lol stuff" | |
| }, | |
| "highlight" : { | |
| "text" : [ "english <em>lol</em> stuff" ] | |
| } | |
| } ] | |
| } | |
| } | |
| # Works with term_vector=with_positions_offsets, _source enabled or not (field stored) | |
| set +v |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Conclusion: