kimchy · January 11, 2012 11:12
diff --git a/gistfile1.sh b/gistfile1.sh
 curl -XPUT localhost:9200/test -d '{
    "settings" : {
        "analysis": {
            "analyzer" : {
                "name_nGram" : {
                    "tokenizer" : "standard",
                    "filter" : "standard,lowercase,name_nGram"
                },
                "name_edgeNGram" : {
                    "tokenizer" : "standard",
                    "filter" : "standard,lowercase,name_edgeNGram"
                }
            },
            "filter" : {
                "name_nGram" : {
                    "type" : "nGram",
                    "min_gram" : 3,
                    "max_gram" : 5
                },
                "name_edgeNGram" : {
                    "type" : "edgeNGram",
                    "min_gram" : 3,
                    "max_gram" : 20
                }
            }
        }
    },
    "mappings" : {
        "user" : {
            "_all" : {"enabled" : false},
            "properties" : {
                "uid" : {
                    "type" : "multi_field",
                    "fields" : {
                        "uid" : {
                            "type" : "string"
                        },
                        "nGram" : {
                            "type" : "string",
                            "index_analyzer" : "name_nGram",
                            "search_analyzer" : "standard"
                        },
                        "edgeNGram" : {
                            "type" : "string",
                            "analyzer" : "name_edgeNGram"
                        }
                    }
                }
            }
        }
    }
 }'

 curl 'localhost:9200/test/_analyze?pretty=1&analyzer=name_nGram' -d 'maoz.sh'
 curl 'localhost:9200/test/_analyze?pretty=1&analyzer=name_edgeNGram' -d 'maoz.sh'

 curl -XPUT localhost:9200/test/user/1 -d '{
    "uid" : "maoz.sh"
 }'

 # Won't find anything because maoz.sh is analyzed into a single token
 curl localhost:9200/test/_search?pretty=1 -d '{
    "query" : {
        "text" : {
            "uid" : {
                "query" : "maoz"
            }
        }
    }
 }'

 # Will find it because its a prefix
 curl localhost:9200/test/_search?pretty=1 -d '{
    "query" : {
        "text" : {
            "uid" : {
                "query" : "maoz",
                "type" : "phrase_prefix"
            }
        }
    }
 }'


 # Will find it because its a prefix (but with limit of max expansions)
 curl localhost:9200/test/_search?pretty=1 -d '{
    "query" : {
        "text" : {
            "uid" : {
                "query" : "maoz",
                "type" : "phrase_prefix",
                "max_expansions" : 10
            }
        }
    }
 }'


 # Text query, but with ngram (no need for prefix) - applies ngram on the query as well (because its analyzed)
 curl localhost:9200/test/_search?pretty=1 -d '{
    "query" : {
        "text" : {
            "uid.nGram" : {
                "query" : "maoz"
            }
        }
    }
 }'

 # uid.Ngram indexed with nGram, but, search with standard analyzer to create less tokens to search on
 # another option is to define in the mapping an index_analyzer of name_nGram and search_analyzer of standard
 curl localhost:9200/test/_search?pretty=1 -d '{
    "query" : {
        "text" : {
            "uid.nGram" : {
                "query" : "maoz",
                "analyzer" : "standard"
            }
        }
    }
 }'
	curl -XPUT localhost:9200/test -d '{
	"settings" : {
	"analysis": {
	"analyzer" : {
	"name_nGram" : {
	"tokenizer" : "standard",
	"filter" : "standard,lowercase,name_nGram"
	},
	"name_edgeNGram" : {
	"tokenizer" : "standard",
	"filter" : "standard,lowercase,name_edgeNGram"
	}
	},
	"filter" : {
	"name_nGram" : {
	"type" : "nGram",
	"min_gram" : 3,
	"max_gram" : 5
	},
	"name_edgeNGram" : {
	"type" : "edgeNGram",
	"min_gram" : 3,
	"max_gram" : 20
	}
	}
	}
	},
	"mappings" : {
	"user" : {
	"_all" : {"enabled" : false},
	"properties" : {
	"uid" : {
	"type" : "multi_field",
	"fields" : {
	"uid" : {
	"type" : "string"
	},
	"nGram" : {
	"type" : "string",
	"index_analyzer" : "name_nGram",
	"search_analyzer" : "standard"
	},
	"edgeNGram" : {
	"type" : "string",
	"analyzer" : "name_edgeNGram"
	}
	}
	}
	}
	}
	}
	}'

	curl 'localhost:9200/test/_analyze?pretty=1&analyzer=name_nGram' -d 'maoz.sh'
	curl 'localhost:9200/test/_analyze?pretty=1&analyzer=name_edgeNGram' -d 'maoz.sh'

	curl -XPUT localhost:9200/test/user/1 -d '{
	"uid" : "maoz.sh"
	}'

	# Won't find anything because maoz.sh is analyzed into a single token
	curl localhost:9200/test/_search?pretty=1 -d '{
	"query" : {
	"text" : {
	"uid" : {
	"query" : "maoz"
	}
	}
	}
	}'

	# Will find it because its a prefix
	curl localhost:9200/test/_search?pretty=1 -d '{
	"query" : {
	"text" : {
	"uid" : {
	"query" : "maoz",
	"type" : "phrase_prefix"
	}
	}
	}
	}'


	# Will find it because its a prefix (but with limit of max expansions)
	curl localhost:9200/test/_search?pretty=1 -d '{
	"query" : {
	"text" : {
	"uid" : {
	"query" : "maoz",
	"type" : "phrase_prefix",
	"max_expansions" : 10
	}
	}
	}
	}'


	# Text query, but with ngram (no need for prefix) - applies ngram on the query as well (because its analyzed)
	curl localhost:9200/test/_search?pretty=1 -d '{
	"query" : {
	"text" : {
	"uid.nGram" : {
	"query" : "maoz"
	}
	}
	}
	}'

	# uid.Ngram indexed with nGram, but, search with standard analyzer to create less tokens to search on
	# another option is to define in the mapping an index_analyzer of name_nGram and search_analyzer of standard
	curl localhost:9200/test/_search?pretty=1 -d '{
	"query" : {
	"text" : {
	"uid.nGram" : {
	"query" : "maoz",
	"analyzer" : "standard"
	}
	}
	}
	}'