Skip to content

Instantly share code, notes, and snippets.

@tc
Created March 6, 2012 06:16
Show Gist options
  • Save tc/1984052 to your computer and use it in GitHub Desktop.
Save tc/1984052 to your computer and use it in GitHub Desktop.
solr config, debugging exact match rankings
solrconfig.xml:
<requestHandler class="solr.SearchHandler" name="ac" default="true" >
<lst name="defaults">
<str name="defType">edismax</str>
<str name="rows">10</str>
<str name="fl">*,score</str>
<str name="qf">title^50.0 textng^40.0</str>
<str name="pf">textnge^50.0</str>
<str name="debugQuery">false</str>
<str name="timeAllowed">5000</str>
</lst>
</requestHandler>
schema.xml:
<field name="id" type="string" indexed="true" stored="true" required="true"/>
<field name="title" type="text_suggest" indexed="true" stored="true" omitNorms="true" />
<copyField source="title" dest="textnge"/>
<field name="textnge" type="autocomplete_edge" indexed="true" stored="false" />
<copyField source="title" dest="textng"/>
<field name="textng" type="autocomplete_ngram" indexed="true" stored="false" omitNorms="true" omitTermFreqAndPositions="true" />
<!-- text_suggest : Matches whole terms in the suggest text -->
<fieldType name="text_suggest" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="1"
splitOnCaseChange="1" splitOnNumerics="1" preserveOriginal="1" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0"
catenateWords="0" catenateNumbers="0" catenateAll="0"
splitOnCaseChange="0" splitOnNumerics="0" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement=" " replace="all"/>
</analyzer>
</fieldType>
<!-- autocomplete_edge : Will match from the left of the field, e.g. if the document field
is "A brown fox" and the query is "A bro", it will match, but not "brown"
-->
<fieldType name="autocomplete_edge" class="solr.TextField">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all"/>
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="30" minGramSize="1"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([\.,;:-_])" replacement=" " replace="all"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{30})(.*)?" replacement="$1" replace="all"/>
</analyzer>
</fieldType>
<!-- autocomplete_ngram : Matches any word in the input field, with implicit right truncation.
This means that the field "A brown fox" will be matched by query "bro".
We use this to get partial matches, but these whould be boosted lower than exact and left-anchored
-->
<fieldType name="autocomplete_ngram" class="solr.TextField">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" maxGramSize="20" minGramSize="1"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="([^\w\d\*æøåÆØÅ ])" replacement="" replace="all"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="^(.{20})(.*)?" replacement="$1" replace="all"/>
</analyzer>
</fieldType>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment