benoit-intrw · August 23, 2012 10:06
diff --git a/test_analyzer.sh b/test_analyzer.sh
 #!/bin/bash
 # Script and configuration to test analyser
 #

 # Check arguments
 hostname=$1
 indexname=$2

 if [ -z "$hostname" ] || [ -z "$indexname" ]
 then
    echo "Usage: test_analyzer hostname indexname"
    echo "    with:"
    echo "    - hostname: IP or hostname (port 9200 is added)"
    echo "    - indexname: name of the index (example test)"
    exit 1
 fi

 # Test string ; note: le HEREDOC permet de teste tous les caractères difficile à echaper en bash
 teststring=$( cat <<EOF
 Canal+ annonce 3 ans de R&D !
 EOF
 )

 # Build URI
 baseuri="http://$hostname:9200/$indexname"

 # Confirm before delete
 #
 echo -n "Index $baseuri will be deleted. Continue ? [yn] "
 read are_you_sure
 if [ "$are_you_sure" != "y" ]
 then
    echo "Cancelled!"
    exit 2
 fi

 # Delete index
 echo -n "Deleting ... "
 curl -XDELETE $baseuri/
 echo ""

 # Load settings
 echo -n "Loading settings ... "
 curl -XPUT "$baseuri/" -d '
 {
    "settings": {
        "index": {
            "analysis": {
                "analyzer": {
                    "francais": {
                        "filter": [
                            "lowercase", 
                            "stop_francais", 
                            "fr_stemmer", 
                            "asciifolding", 
                            "elision"
                        ], 
                        "tokenizer": "standard", 
                        "type": "custom"
                    } ,
                    "test1": {
                        "tokenizer": "standard", 
                        "type": "custom"
                    }, 
                    "test2": {
                        "tokenizer": "whitespace", 
                        "type": "custom"
                    }, 
                    "test3": {
                        "filter": [
                            "lowercase", 
                            "stop_francais", 
                            "fr_stemmer", 
                            "asciifolding", 
                            "elision"
                        ], 
                        "tokenizer": "whitespace", 
                        "type": "custom"
                    },
                    "test4": {
                        "filter": [
                            "lowercase", 
                            "stop_francais", 
                            "fr_stemmer", 
                            "asciifolding", 
                            "elision"
                        ], 
                        "tokenizer": "standard", 
                        "char_filter" : ["my_mapping"],
                        "type": "custom"
                    }
                }, 
                "filter": {
                    "elision": {
                        "articles": [ "l", "m", "t", "qu", "n", "s", "j", "d" ],
                        "type": "elision"
                    }, 
                    "fr_stemmer": {
                        "name": "french", 
                        "type": "stemmer"
                    }, 
                    "stop_francais": {
                        "stopwords": [
                            "_french_"
                        ], 
                        "type": "stop"
                    }
                },
                "char_filter" : {
                    "my_mapping" : {
                        "type" : "mapping",
                        "mappings" : ["&=>et", "+=>plus"]
                    }
                }
            }, 
            "number_of_replicas": 0, 
            "number_of_shards": 1 
        }
    }
 }'

 echo ""
 echo ""
 echo "Test string : $teststring"
 echo ""

 echo "Analyzer custom français (francais)"
 curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=francais" -d "$teststring"
 echo ""
 echo "Tokenizer standard seul (test1)"
 curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test1" -d "$teststring"
 echo ""
 echo "Tokenizer whitespace seul (test2)"
 curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test2" -d "$teststring"
 echo ""
 echo "Analyzer custom français avec tokenizer whitespace (test3)"
 curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test3" -d "$teststring"
 echo ""
 echo "Analyzer custom français avec tokenizer standard et mapping (test4)"
 curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test4" -d "$teststring"
 echo ""

 # End.
 echo "Done!"
 exit 0
	#!/bin/bash
	# Script and configuration to test analyser
	#

	# Check arguments
	hostname=$1
	indexname=$2

	if [ -z "$hostname" ] \|\| [ -z "$indexname" ]
	then
	echo "Usage: test_analyzer hostname indexname"
	echo " with:"
	echo " - hostname: IP or hostname (port 9200 is added)"
	echo " - indexname: name of the index (example test)"
	exit 1
	fi

	# Test string ; note: le HEREDOC permet de teste tous les caractères difficile à echaper en bash
	teststring=$( cat <<EOF
	Canal+ annonce 3 ans de R&D !
	EOF
	)

	# Build URI
	baseuri="http://$hostname:9200/$indexname"

	# Confirm before delete
	#
	echo -n "Index $baseuri will be deleted. Continue ? [yn] "
	read are_you_sure
	if [ "$are_you_sure" != "y" ]
	then
	echo "Cancelled!"
	exit 2
	fi

	# Delete index
	echo -n "Deleting ... "
	curl -XDELETE $baseuri/
	echo ""

	# Load settings
	echo -n "Loading settings ... "
	curl -XPUT "$baseuri/" -d '
	{
	"settings": {
	"index": {
	"analysis": {
	"analyzer": {
	"francais": {
	"filter": [
	"lowercase",
	"stop_francais",
	"fr_stemmer",
	"asciifolding",
	"elision"
	],
	"tokenizer": "standard",
	"type": "custom"
	} ,
	"test1": {
	"tokenizer": "standard",
	"type": "custom"
	},
	"test2": {
	"tokenizer": "whitespace",
	"type": "custom"
	},
	"test3": {
	"filter": [
	"lowercase",
	"stop_francais",
	"fr_stemmer",
	"asciifolding",
	"elision"
	],
	"tokenizer": "whitespace",
	"type": "custom"
	},
	"test4": {
	"filter": [
	"lowercase",
	"stop_francais",
	"fr_stemmer",
	"asciifolding",
	"elision"
	],
	"tokenizer": "standard",
	"char_filter" : ["my_mapping"],
	"type": "custom"
	}
	},
	"filter": {
	"elision": {
	"articles": [ "l", "m", "t", "qu", "n", "s", "j", "d" ],
	"type": "elision"
	},
	"fr_stemmer": {
	"name": "french",
	"type": "stemmer"
	},
	"stop_francais": {
	"stopwords": [
	"_french_"
	],
	"type": "stop"
	}
	},
	"char_filter" : {
	"my_mapping" : {
	"type" : "mapping",
	"mappings" : ["&=>et", "+=>plus"]
	}
	}
	},
	"number_of_replicas": 0,
	"number_of_shards": 1
	}
	}
	}'

	echo ""
	echo ""
	echo "Test string : $teststring"
	echo ""

	echo "Analyzer custom français (francais)"
	curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=francais" -d "$teststring"
	echo ""
	echo "Tokenizer standard seul (test1)"
	curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test1" -d "$teststring"
	echo ""
	echo "Tokenizer whitespace seul (test2)"
	curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test2" -d "$teststring"
	echo ""
	echo "Analyzer custom français avec tokenizer whitespace (test3)"
	curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test3" -d "$teststring"
	echo ""
	echo "Analyzer custom français avec tokenizer standard et mapping (test4)"
	curl -XGET "$hostname:9200/$indexname/_analyze?pretty=true&analyzer=test4" -d "$teststring"
	echo ""

	# End.
	echo "Done!"
	exit 0