Created
August 16, 2017 08:19
-
-
Save lobster1234/35078e4fc1df30e249b986e19fd67202 to your computer and use it in GitHub Desktop.
Fixed up schema.xml to work with Nutch 2.3.1 and Solr 6.5.1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!-- Solr managed schema - automatically generated - DO NOT EDIT --> | |
<schema name="nutch" version="1.5"> | |
<uniqueKey>id</uniqueKey> | |
<defaultSearchField>text</defaultSearchField> | |
<solrQueryParser defaultOperator="OR"/> | |
<fieldType name="binary" class="solr.BinaryField"/> | |
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/> | |
<fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/> | |
<fieldType name="float" class="solr.TrieFloatField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/> | |
<fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/> | |
<fieldType name="int" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/> | |
<fieldType name="long" class="solr.TrieLongField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/> | |
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.KeywordTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="payloads" class="solr.TextField" indexed="true" stored="false"> | |
<analyzer> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="phonetic" class="solr.TextField" indexed="true" stored="false"> | |
<analyzer> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true"/> | |
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="6"/> | |
<fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/> | |
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.EnglishPossessiveFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.EnglishPossessiveFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.PorterStemFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.WhitespaceTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> | |
<filter class="solr.EnglishMinimalStemFilterFactory"/> | |
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer type="index"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/> | |
</analyzer> | |
<analyzer type="query"> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/> | |
<filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="text_path" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.PathHierarchyTokenizerFactory"/> | |
</analyzer> | |
</fieldType> | |
<fieldType name="tfloat" class="solr.TrieFloatField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/> | |
<fieldType name="tint" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/> | |
<fieldType name="tlong" class="solr.TrieLongField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/> | |
<fieldType name="url" class="solr.TextField" positionIncrementGap="100"> | |
<analyzer> | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.WordDelimiterFilterFactory" generateNumberParts="1" generateWordParts="1"/> | |
</analyzer> | |
</fieldType> | |
<field name="_version_" type="long" indexed="true" stored="true"/> | |
<field name="anchor" type="text_general" multiValued="true" indexed="true" stored="true"/> | |
<field name="author" type="string" indexed="true" stored="true"/> | |
<field name="batchId" type="string" indexed="false" stored="true"/> | |
<field name="boost" type="float" indexed="false" stored="true"/> | |
<field name="cache" type="string" indexed="false" stored="true"/> | |
<field name="cc" type="string" multiValued="true" indexed="true" stored="true"/> | |
<field name="content" type="text_general" indexed="true" stored="true"/> | |
<field name="contentLength" type="string" indexed="false" stored="true"/> | |
<field name="date" type="tdate" indexed="true" stored="true"/> | |
<field name="digest" type="string" indexed="false" stored="true"/> | |
<field name="feed" type="string" indexed="true" stored="true"/> | |
<field name="host" type="url" indexed="true" stored="false"/> | |
<field name="id" type="string" indexed="true" required="true" stored="true"/> | |
<field name="lang" type="string" indexed="true" stored="true"/> | |
<field name="lastModified" type="date" indexed="false" stored="true"/> | |
<field name="publishedDate" type="date" indexed="true" stored="true"/> | |
<field name="rawcontent" type="string" indexed="false" stored="true"/> | |
<field name="subcollection" type="string" multiValued="true" indexed="true" stored="true"/> | |
<field name="tag" type="string" multiValued="true" indexed="true" stored="true"/> | |
<field name="text" type="text_general" multiValued="true" indexed="true" stored="false"/> | |
<field name="title" type="text_general" multiValued="true" indexed="true" stored="true"/> | |
<field name="tld" type="string" indexed="false" stored="false"/> | |
<field name="tstamp" type="date" indexed="false" stored="true"/> | |
<field name="type" type="string" multiValued="true" indexed="true" stored="true"/> | |
<field name="updatedDate" type="date" indexed="true" stored="true"/> | |
<field name="url" type="url" indexed="true" stored="true"/> | |
<dynamicField name="meta_*" type="string" indexed="true" stored="true"/> | |
<copyField source="anchor" dest="text"/> | |
<copyField source="author" dest="text"/> | |
<copyField source="content" dest="text"/> | |
<copyField source="title" dest="text"/> | |
<copyField source="url" dest="text"/> | |
</schema> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, Can I use this schema for solr 7.1 as well?
Thanks,
Sai