Created
May 15, 2018 01:12
-
-
Save geekpete/0f569ce6c14aaf5701cdd4f01976fed7 to your computer and use it in GitHub Desktop.
path_hierarchy tokenizer examples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Path Hierarchy Examples | |
DELETE /file-path-test/ | |
# create mapping with file_path text field and two subfields for file path tree to be analyzed in two different ways using custom analyzers. | |
# field <- customer analyzer <- customer tokenizer <- path_hierarchy tokenizer | |
# | |
PUT /file-path-test/ | |
{ | |
"settings": { | |
"analysis": { | |
"analyzer": { | |
"custom_path_tree": { | |
"tokenizer": "custom_hierarchy" | |
}, | |
"custom_path_tree_reversed": { | |
"tokenizer": "custom_hierarchy_reversed" | |
} | |
}, | |
"tokenizer": { | |
"custom_hierarchy": { | |
"type": "path_hierarchy", | |
"delimiter": "/", | |
"search_analyzer": "keyword" | |
}, | |
"custom_hierarchy_reversed": { | |
"type": "path_hierarchy", | |
"delimiter": "/", | |
"search_analyzer": "keyword", | |
"reverse": "true" | |
} | |
} | |
}, | |
"number_of_replicas": 0, | |
"number_of_shards": 1 | |
}, | |
"mappings": { | |
"doc": { | |
"properties": { | |
"file_path": { | |
"type": "text", | |
"fields": { | |
"tree": { | |
"type": "text", | |
"analyzer": "custom_path_tree" | |
}, | |
"tree_reversed": { | |
"type": "text", | |
"analyzer": "custom_path_tree_reversed" | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
GET /file-path-test/_mapping | |
POST /file-path-test/doc/ | |
{ | |
"file_path": "/User/harry/photos/2017/05/16/my_photo1.jpg" | |
} | |
POST /file-path-test/doc/ | |
{ | |
"file_path": "/User/harry/photos/2017/05/16/my_photo2.jpg" | |
} | |
POST /file-path-test/doc/ | |
{ | |
"file_path": "/User/harry/photos/2017/05/16/my_photo3.jpg" | |
} | |
POST /file-path-test/doc/ | |
{ | |
"file_path": "/User/harry/photos/2017/05/15/my_photo1.jpg" | |
} | |
POST /file-path-test/doc/ | |
{ | |
"file_path": "/User/wally/photos/2017/05/16/my_photo1.jpg" | |
} | |
GET /file-path-test/_search | |
# this will match all of our example docs, but wally's will rank highest, since more matching keywords add to relevance. | |
GET file-path-test/_search | |
{ | |
"query": { | |
"match": { | |
"file_path": "/User/wally/photos/2017/05" | |
} | |
} | |
} | |
# this will essentially only match docs with this exact keyword in the file_path terms list, it's also (still doing scoring) | |
GET file-path-test/_search | |
{ | |
"query": { | |
"term": { | |
"file_path.tree": "/User/wally/photos/2017/05" | |
} | |
} | |
} | |
# another example of a keyword term match, 3 docs match | |
GET file-path-test/_search | |
{ | |
"query": { | |
"term": { | |
"file_path.tree": "/User/harry/photos/2017/05/16" | |
} | |
} | |
} | |
# What about reversed tokenizer? | |
# This will do the same type of matching/terms creation but starting from the other end of the string | |
GET file-path-test/_search | |
{ | |
"query": { | |
"term": { | |
"file_path.tree_reversed": { | |
"value": "my_photo1.jpg" | |
} | |
} | |
} | |
} | |
# so you can do all the usual things to the top level text field, then use the tree/tree_reversed subfields to do filtering | |
GET file-path-test/_search | |
{ | |
"query": { | |
"bool" : { | |
"must" : { | |
"match" : { "file_path" : "16" } | |
}, | |
"filter": { | |
"term" : { "file_path.tree" : "/User/harry" } | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment