Skip to content

Instantly share code, notes, and snippets.

@uzicorp
Created April 19, 2012 16:20
Show Gist options
  • Save uzicorp/2422090 to your computer and use it in GitHub Desktop.
Save uzicorp/2422090 to your computer and use it in GitHub Desktop.
Experiment with Shingle Filter and Stoplist
#!/bin/sh
curl -XDELETE http://localhost:9200/shinglephrase
curl -XPOST http://localhost:9200/shinglephrase/ -d '
{
"settings":{
"index":{
"number_of_shards":5,
"number_of_replicas":1
},
"analysis":{
"filter":{
"myCustomShingle":{
"type":"shingle",
"max_shingle_size":2,
"output_unigrams":true
}
},
"analyzer":{
"myAnalyzer":{
"type":"custom",
"tokenizer":"standard",
"filter":[
"lowercase",
"myCustomShingle",
"stop",
"kstem"
]
}
}
}
},
"mappings":{
"messages":{
"_source":{
"enabled":true
},
"_all":{
"enabled":false
},
"index.query.default_field":"msg",
"properties":{
"response_time":{
"format":"dateOptionalTime",
"type":"date"
},
"process_id":{
"type":"long"
},
"msg":{
"type":"string",
"analyzer":"myAnalyzer",
"store":"yes",
"term_vector":"with_positions_offsets"
}
}
}
}
}
'
sleep 1
########################################################################################################################
# Dump a 10 messages into this index
########################################################################################################################
curl -XPUT 'http://localhost:9200/_bulk' --data-binary '
{"index":{"_index":"shinglephrase","_type":"messages","_id":1}}
{"_id":1,"process_id":1,"response_time":"2012-01-01T00:00:00.000Z","msg":"test message"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":2}}
{"_id":2,"process_id":2,"response_time":"2012-01-01T00:00:00.000Z","msg":"no way"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":3}}
{"_id":3,"process_id":3,"response_time":"2012-01-01T00:00:00.000Z","msg":"no way"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":4}}
{"_id":4,"process_id":4,"response_time":"2012-01-01T00:00:00.000Z","msg":"a problem"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":5}}
{"_id":5,"process_id":5,"response_time":"2012-01-01T00:00:00.000Z","msg":"a problem"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":6}}
{"_id":6,"process_id":6,"response_time":"2012-01-01T00:00:00.000Z","msg":"my problem"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":7}}
{"_id":7,"process_id":7,"response_time":"2012-01-01T00:00:00.000Z","msg":"and the"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":8}}
{"_id":8,"process_id":8,"response_time":"2012-01-01T00:00:00.000Z","msg":"and I"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":9}}
{"_id":9,"process_id":9,"response_time":"2012-01-01T00:00:00.000Z","msg":"and I"}
{"index":{"_index":"shinglephrase","_type":"messages","_id":10}}
{"_id":10,"process_id":10,"response_time":"2012-01-01T00:00:00.000Z","msg":"and I"}
'
sleep 1
########################################################################################################################
# Top 10
########################################################################################################################
curl -X POST "http://localhost:9200/shinglephrase/_search?&pretty=true" -d '
{
"size":"0",
"query": {
"match_all": {}
},
"facets": {
"blah": {
"terms": {
"field": "msg",
"analyzer": "myAnalyzer",
"exclude": [
"i"
],
"size": "10"
}
}
}
}
'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment