Created
March 6, 2012 06:16
-
-
Save tc/1984052 to your computer and use it in GitHub Desktop.
solr config, debugging exact match rankings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
solrconfig.xml: | |
<requestHandler class="solr.SearchHandler" name="ac" default="true" > | |
<lst name="defaults"> | |
<str name="defType">edismax</str> | |
<str name="rows">10</str> | |
<str name="fl">*,score</str> | |
<str name="qf">title^50.0 textng^40.0</str> | |
<str name="pf">textnge^50.0</str> | |
<str name="debugQuery">false</str> | |
<str name="timeAllowed">5000</str> | |
</lst> | |
</requestHandler> | |
schema.xml: | |
<field name="id" type="string" indexed="true" stored="true" required="true"/> | |
<field name="title" type="text_suggest" indexed="true" stored="true" omitNorms="true" /> | |
<copyField source="title" dest="textnge"/> | |
<field name="textnge" type="autocomplete_edge" indexed="true" stored="false" /> | |
<copyField source="title" dest="textng"/> | |
<field name="textng" type="autocomplete_ngram" indexed="true" stored="false" omitNorms="true" omitTermFreqAndPositions="true" /> | |
<!-- text_suggest : Matches whole terms in the suggest text --> | |
<fieldType name="text_suggest" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.WordDelimiterFilterFactory" | |
generateWordParts="1" generateNumberParts="1" | |
catenateWords="1" catenateNumbers="1" catenateAll="1" | |
splitOnCaseChange="1" splitOnNumerics="1" preserveOriginal="1" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/> | |
</analyzer> | |
<analyzer type="query"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.WordDelimiterFilterFactory" | |
generateWordParts="0" generateNumberParts="0" | |
catenateWords="0" catenateNumbers="0" catenateAll="0" | |
splitOnCaseChange="0" splitOnNumerics="0" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/> | |
</analyzer> | |
</fieldType> | |
<!-- autocomplete_edge : Will match from the left of the field, e.g. if the document field | |
is "A brown fox" and the query is "A bro", it will match, but not "brown" | |
--> | |
<fieldType name="autocomplete_edge" class="solr.TextField"> | |
<analyzer type="index"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all"/> | |
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="30" minGramSize="1"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/> | |
</analyzer> | |
<analyzer type="query"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{30})(.*)?" replacement="$1" replace="all"/> | |
</analyzer> | |
</fieldType> | |
<!-- autocomplete_ngram : Matches any word in the input field, with implicit right truncation. | |
This means that the field "A brown fox" will be matched by query "bro". | |
We use this to get partial matches, but these whould be boosted lower than exact and left-anchored | |
--> | |
<fieldType name="autocomplete_ngram" class="solr.TextField"> | |
<analyzer type="index"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/> | |
</analyzer> | |
<analyzer type="query"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?" replacement="$1" replace="all"/> | |
</analyzer> | |
</fieldType> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment