Last active
August 29, 2015 14:13
-
-
Save jeffsteinmetz/2ea8329c667386c80fae to your computer and use it in GitHub Desktop.
Determine list [array] size in elasticsearch issue
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tested against elasticsearch 1.4.1 | |
# groovy script does not accuratly count number of elements in list | |
# if you don't add a mapping to "not_analyze" it is even worse | |
INDEX_NAME='list-count-test' | |
NODE='localhost' | |
curl -XDELETE 'http://'$NODE':9200/'$INDEX_NAME | |
curl -XPUT 'http://'$NODE':9200/'$INDEX_NAME'/' -d '{ | |
"settings" : { | |
"index" : { | |
"number_of_shards" : 1, | |
"number_of_replicas" : 1 | |
} | |
} | |
}' | |
# set mapping | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post/_mapping' -d '{ | |
"post": { | |
"properties": { | |
"titles": { "type": "string", "index": "not_analyzed" } | |
} | |
} | |
}' | |
# insert some test data | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{ | |
"titles": ["one", "duplicate", "duplicate"] | |
}' | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{ | |
"titles": ["one", "two"] | |
}' | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{ | |
"titles": ["one", "with spaces", "more spaces","four"] | |
}' | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{ | |
"titles": ["this/has/slashes", "more/slashes"] | |
}' | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{ | |
"titles": ["http://bit.ly/abc", "http://bit.ly/abc", "http://bit.ly/def", "http://bit.ly/ghi"] | |
}' | |
curl -XPOST 'http://'$NODE':9200/'$INDEX_NAME'/post' -d '{ | |
"titles": ["one"] | |
}' | |
# show the mapping | |
#curl -XGET 'http://'$NODE':9200/'$INDEX_NAME'/post/_mapping' | |
ECHO | |
ECHO '-------------' | |
ECHO ' waiting 6 seconds for index to refresh ' | |
sleep 6 | |
# this should only return only records with 2 titles in list | |
# but this returns an incorrect item | |
# "titles": ["one", "duplicate", "duplicate"] | |
# and does not return as a doc with 2 items | |
# "titles": ["http://bit.ly/abc", "http://bit.ly/abc"] | |
ECHO 'TEST 1, should only return documents with 2 items in titles list [array]' | |
ECHO | |
curl -XGET 'http://'$NODE':9200/'$INDEX_NAME'/post/_search?pretty=true' -d ' | |
{ | |
"query": { | |
"filtered": { | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match_all": {} | |
} | |
] | |
} | |
}, | |
"filter": { | |
"script": { | |
"script": "doc['\'titles\''].values.size() == 2" | |
} | |
} | |
} | |
} | |
}' | |
ECHO 'TEST 2' | |
ECHO | |
curl -XGET 'http://'$NODE':9200/'$INDEX_NAME'/post/_search?pretty=true' -d ' | |
{ | |
"size": 20, | |
"script_fields": { | |
"sizeFromScript": { | |
"script": "doc['\'titles\''].values.size()" | |
} | |
}, | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"match_all": {} | |
} | |
] | |
} | |
}, | |
"_source": { | |
"includes": [ | |
"titles" | |
] | |
} | |
}' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment