Created
July 2, 2015 11:52
-
-
Save fontanka16/42ce366b45a206559573 to your computer and use it in GitHub Desktop.
Stockholm University OPAC Solr Schema.xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<schema name="example" version="1.5"> | |
<fields> | |
<field name="_version_" type="long" indexed="true" stored="true"/> | |
<field name="abstract_display" type="string" indexed="false" stored="true" multiValued="false"/> | |
<field name="accessible_online" type="boolean" indexed="true" stored="true" /> | |
<field name="allfields" type="text" indexed="true" multiValued="true" stored="false"/> | |
<field name="availability" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="class_dewey_full" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="class_kssb_full" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="date_record_created" type="date" indexed="true" stored="true"/> | |
<field name="date_record_last_update" type="date" indexed="true" stored="true"/> | |
<field name="date_record_last_indexed" type="date" indexed="true" stored="true"/> | |
<field name="doctype_sv" type="string" indexed="true" stored="true" multiValued="true" /> | |
<field name="doctype_en" type="string" indexed="true" stored="true" multiValued="true" /> | |
<field name="edition_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="error_strings" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="fullrecord" type="string" indexed="false" stored="true"/> | |
<field name="fulltext" type="text" indexed="true" stored="false"/> | |
<field name="genre_marc_en" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="genre_marc_sv" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="holding_information_string" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="hosting_publication_isbn_issn" type="isn" indexed="true" stored="true" multiValued="true"/> | |
<field name="id" type="string" indexed="true" stored="true"/> | |
<field name="isbn_issn" type="isn" indexed="true" stored="true" multiValued="true"/> | |
<field name="language_code" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="language_string_sv" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="library_name" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="NoHoldingMessage" type="string" indexed="true" stored="true" multiValued="true" /> | |
<field name="language_string_en" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="note_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="note_primary_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="note_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="physical_description_en" type="string" indexed="true" stored="true" multiValued="false"/> | |
<field name="physical_description_sv" type="string" indexed="true" stored="true" multiValued="false"/> | |
<field name="print_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="publisher_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<!-- responsibility = upphov --> | |
<field name="responsibility_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/> | |
<field name="responsibility_primary_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="responsibility_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="responsibility_secondary_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="responsibility_secondary_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="responsibility_secondary_facet" type="string" indexed="false" stored="false" multiValued="true"/> | |
<field name="responsibility_primary_facet" type="string" indexed="false" stored="false" multiValued="false"/> | |
<field name="sigel" type="string" indexed="true" stored="false" multiValued="true"/> | |
<field name="source" type="string" indexed="true" stored="true" multiValued="true"/> | |
<field name="spelling" type="textSpell" indexed="true" stored="true"/> | |
<field name="spellingShingle" type="textSpellShingle" indexed="true" stored="true" multiValued="true"/> | |
<field name="subject_area_su_sv" type="textFacet" indexed="true" stored="true" multiValued="true"/> | |
<field name="subject_area_su_en" type="textFacet" indexed="true" stored="true" multiValued="true"/> | |
<field name="subject_term_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="subject_term_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="subject_term_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/> | |
<field name="subject_term_lcsh_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="subject_term_lcsh_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="subject_term_lcsh_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/> | |
<field name="subject_term_sab_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="subject_term_sab_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="subject_term_sab_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/> | |
<field name="titles_alternative_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="titles_alternative_display" type="textProper" indexed="false" stored="true" multiValued="true"/> | |
<field name="title_main_search" type="textProper" indexed="true" stored="false" multiValued="false"/> | |
<field name="title_main_display" type="string" indexed="false" stored="true" multiValued="false"/> | |
<!-- för alternativ, se http://lucene.472066.n3.nabble.com/Faceting-and-first-letter-of-fields-td1703254.html eller http://www.packtpub.com/article/faceting-in-solr-1.4-enterprise-search-server --> | |
<field name="title_main__facetLetter" type="bucketFirstLetter" stored="true" /> | |
<field name="title_main_sort" type="textSort" indexed="true" stored="true" multiValued="false"/> | |
<field name="title_hosting_publication_display" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="title_hosting_publication_search" type="textProper" indexed="true" stored="false" multiValued="true"/> | |
<field name="title_hosting_publication_facet" type="textFacet" indexed="true" stored="false" multiValued="true"/> | |
<field name="type_of_resource_en" type="string" indexed="true" stored="true" multiValued="false"/> | |
<field name="type_of_resource_sv" type="string" indexed="true" stored="true" multiValued="false"/> | |
<field name="url_primary" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="url_primary_title" type="string" indexed="false" stored="true" multiValued="false"/> | |
<field name="url_openurl" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="verde_id" type="string" indexed="false" stored="true" multiValued="false"/> | |
<field name="website_category" type="string" indexed="true" stored="true"/> | |
<field name="url_libris" type="string" indexed="false" stored="true" multiValued="true"/> | |
<field name="year_primary" type="string" indexed="true" stored="true"/> | |
<field name="year_facet" type="sint" indexed="true" stored="true"/> | |
<field name="pages" type="sint" indexed="true" stored="true"/> | |
</fields> | |
<uniqueKey>id</uniqueKey> | |
<copyField source="title_main_display" dest="spelling" /> | |
<copyField source="genre_marc_sv" dest="spellingShingle" /> | |
<copyField source="titles_alternative_search" dest="spellingShingle" /> | |
<copyField source="title_main_search" dest="titles_alternative_search" /> | |
<copyField source="titles_alternative_search" dest="titles_alternative_display" /> | |
<copyField source="responsibility_secondary_search" dest="spellingShingle" /> | |
<copyField source="responsibility_secondary_search" dest="responsibility_search" /> | |
<copyField source="responsibility_search" dest="spellingShingle" /> | |
<copyField source="title_main_display" dest="title_main_sort" /> | |
<copyField source="title_main_display" dest="title_main__facetLetter" /> | |
<copyField source="responsibility_primary_facet" dest="responsibility_facet" /> | |
<copyField source="responsibility_secondary_facet" dest="responsibility_facet" /> | |
<copyField source="responsibility_secondary_facet" dest="responsibility_secondary_display" /> | |
<!-- Kopiera specifika ämnesord till det generiska fältet --> | |
<copyField source="subject_term_lcsh_display" dest="subject_term_display" /> | |
<copyField source="subject_term_sab_display" dest="subject_term_display" /> | |
<copyField source="subject_term_sab_display" dest="subject_term_sab_facet" /> | |
<copyField source="subject_term_sab_display" dest="spellingShingle" /> | |
<copyField source="subject_term_sab_display" dest="subject_term_sab_search" /> | |
<copyField source="subject_term_lcsh_display" dest="subject_term_lcsh_facet" /> | |
<copyField source="subject_term_lcsh_display" dest="spellingShingle" /> | |
<copyField source="subject_term_lcsh_display" dest="subject_term_lcsh_search" /> | |
<copyField source="subject_term_display" dest="subject_term_facet" /> | |
<copyField source="subject_term_display" dest="subject_term_search" /> | |
<copyField source="subject_term_display" dest="spellingShingle" /> | |
<copyField source="title_hosting_publication_display" dest="title_hosting_publication_search" /> | |
<copyField source="title_hosting_publication_display" dest="title_hosting_publication_facet" /> | |
<copyField source="responsibility_primary_facet" dest="responsibility_primary_display" /> | |
<copyField source="abstract_display" dest="allfields" /> | |
<copyField source="class_dewey_full" dest="allfields" /> | |
<copyField source="class_kssb_full" dest="allfields" /> | |
<copyField source="doctype_sv" dest="allfields" /> | |
<copyField source="doctype_en" dest="allfields" /> | |
<copyField source="edition_display" dest="allfields" /> | |
<copyField source="fulltext" dest="allfields" /> | |
<copyField source="genre_marc_en" dest="allfields" /> | |
<copyField source="genre_marc_sv" dest="allfields" /> | |
<copyField source="holding_information_string" dest="allfields" /> | |
<copyField source="hosting_publication_isbn_issn" dest="allfields" /> | |
<copyField source="isbn_issn" dest="allfields" /> | |
<copyField source="language_code" dest="allfields" /> | |
<copyField source="library_name" dest="allfields" /> | |
<copyField source="NoHoldingMessage" dest="allfields" /> | |
<copyField source="note_display" dest="allfields" /> | |
<copyField source="note_primary_display" dest="allfields" /> | |
<copyField source="physical_description_en" dest="allfields" /> | |
<copyField source="physical_description_sv" dest="allfields" /> | |
<copyField source="print_display" dest="allfields" /> | |
<copyField source="publisher_display" dest="allfields" /> | |
<copyField source="responsibility_facet" dest="allfields" /> | |
<copyField source="sigel" dest="allfields" /> | |
<copyField source="subject_area_su_sv" dest="allfields" /> | |
<copyField source="subject_area_su_en" dest="allfields" /> | |
<copyField source="subject_term_display" dest="allfields" /> | |
<copyField source="subject_term_lcsh_display" dest="allfields" /> | |
<copyField source="subject_term_sab_display" dest="allfields" /> | |
<copyField source="titles_alternative_display" dest="allfields" /> | |
<copyField source="title_main_display" dest="allfields" /> | |
<copyField source="title_hosting_publication_display" dest="allfields" /> | |
<copyField source="url_primary" dest="allfields" /> | |
<copyField source="url_primary_title" dest="allfields" /> | |
<copyField source="url_openurl" dest="allfields" /> | |
<copyField source="website_category" dest="allfields" /> | |
<copyField source="year_primary" dest="allfields" /> | |
<copyField source="pages" dest="allfields" /> | |
<types> | |
<fieldType name="string" class="solr.StrField" sortMissingLast="true" /> | |
<!-- boolean type: "true" or "false" --> | |
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/> | |
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/> | |
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/> | |
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/> | |
<fieldtype name="binary" class="solr.BinaryField"/> | |
<fieldType name="pint" class="solr.IntField"/> | |
<fieldType name="plong" class="solr.LongField"/> | |
<fieldType name="pfloat" class="solr.FloatField"/> | |
<fieldType name="pdouble" class="solr.DoubleField"/> | |
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/> | |
<fieldType name="random" class="solr.RandomSortField" indexed="true" /> | |
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" | |
ignoreCase="true" | |
words="lang/stopwords_en.txt" | |
/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.EnglishPossessiveFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
<filter class="solr.StopFilterFactory" | |
ignoreCase="true" | |
words="lang/stopwords_en.txt" | |
/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.EnglishPossessiveFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
<analyzer type="index"> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" | |
ignoreCase="true" | |
words="lang/stopwords_en.txt" | |
/> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
<filter class="solr.StopFilterFactory" | |
ignoreCase="true" | |
words="lang/stopwords_en.txt" | |
/> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- Less flexible matching, but less false matches. Probably not ideal for product names, | |
but may be good for SKUs. Can insert dashes in the wrong place and still match. --> | |
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true"> | |
<analyzer> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> | |
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes | |
possible with WordDelimiterFilter in conjuncton with stemming. --> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- Just like text_general except it reverses the characters of | |
each token, to enable more efficient leading wildcard queries. --> | |
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" | |
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
--> | |
<filter class="solr.LowerCaseFilterFactory" /> | |
<filter class="solr.TrimFilterFactory" /> | |
<filter class="solr.PatternReplaceFilterFactory" | |
pattern="([^a-z])" replacement="" replace="all" | |
/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory" /> | |
</analyzer> | |
</fieldType> | |
<fieldType name="descendent_path" class="solr.TextField"> | |
<analyzer type="index"> | |
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.KeywordTokenizerFactory" /> | |
</analyzer> | |
</fieldType> | |
<fieldType name="ancestor_path" class="solr.TextField"> | |
<analyzer type="index"> | |
<tokenizer class="solr.KeywordTokenizerFactory" /> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" /> | |
</analyzer> | |
</fieldType> | |
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" /> | |
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/> | |
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/> | |
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType" | |
geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" /> | |
<fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" /> | |
<!-- Swedish --> | |
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" /> | |
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/> | |
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> --> | |
</analyzer> | |
</fieldType> | |
<fieldType name="bucketFirstLetter" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
<analyzer type="index"> | |
<tokenizer class="solr.PatternTokenizerFactory" pattern="^([a-zA-Z]).*" group="1" /> | |
<filter class="solr.SynonymFilterFactory" synonyms="mb_letterBuckets.txt" ignoreCase="true" expand="false" /> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="textSort" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="textFacet" class="solr.TextField" sortMissingLast="true" omitNorms="true"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<!-- strip trailing punctuation from facets: --> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="(?<!\b[A-Z])[.\s]*$" replacement="" replace="first"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0" stemEnglishPossessive="1"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<!--<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>--> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
<!--<filter class="solr.ISOLatin1AccentFilterFactory"/>--> | |
</analyzer> | |
<analyzer type="query"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" /> | |
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> | |
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" stemEnglishPossessive="1"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
<!--<filter class="solr.ISOLatin1AccentFilterFactory"/>--> | |
</analyzer> | |
</fieldType> | |
<!-- Text Field without Stemming and Synonyms --> | |
<fieldType name="textProper" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<!-- <filter class="solr.ISOLatin1AccentFilterFactory"/>--> | |
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" stemEnglishPossessive="1"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-FoldToASCII.txt"/> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" /> | |
<!-- <filter class="solr.ISOLatin1AccentFilterFactory"/>--> | |
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" stemEnglishPossessive="1"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- Basic Text Field for use with Spell Correction --> | |
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<!--<filter class="solr.ISOLatin1AccentFilterFactory"/>--> | |
<filter class="solr.WordDelimiterFilterFactory" preserveOriginal="1" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" stemEnglishPossessive="1"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwordsSpell.txt"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<!-- More advanced spell checking field. --> | |
<fieldType name="textSpellShingle" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwordsSpell.txt"/> | |
<filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="false"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="\[elektronisk resurs\]|/|:|-" replacement="" /> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> | |
<filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="false"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/> | |
<!-- Text Field for Normalized ISBN/ISSN Numbers - take first chunk of text | |
prior to whitespace, force to lowercase, strip non-ISBN/ISSN characters, | |
omit results that are empty after stripping. --> | |
<fieldType name="isn" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.PatternTokenizerFactory" pattern="^(\S*)\s*.*$" group="1"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.PatternReplaceFilterFactory" pattern="[^0-9x]" replacement="" replace="all"/> | |
<filter class="solr.LengthFilterFactory" min="1" max="100" /> | |
</analyzer> | |
</fieldType> | |
</types> | |
</schema> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment