Created
November 12, 2015 03:50
-
-
Save kenprice/82ce5ae16e9a09cb81f0 to your computer and use it in GitHub Desktop.
Ken's schema.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<schema name="gios-stuff" version="0.1"> | |
<fields> | |
<!-- Should id be string? --> | |
<field name="id" type="string" indexed="true" stored="true" required="true"/> | |
<field name="title" type="textgen" indexed="true" stored="true"/> | |
<!-- textgen tokenizes at whilespace, will catenate words and split on case change when indexed and queried. | |
So a query for "Augustus DeMorgan" and "Augustus De-Morgan" will both be a match for Augustus De Morgan | |
--> | |
<field name="author" type="textgen" indexed="true" stored="true"/> | |
<field name="author_last_name" type="textgen" indexed="true" stored="true"/> | |
<field name="author_first_name" type="textgen" indexed="true" stored="true"/> | |
<field name="organization" type="textgen" indexed="true" stored="true"/> | |
<field name="publish_date" type="TrieDateField" indexed="true" stored="true"/> | |
<field name="body" type="document_body" indexed="true" stored="true"/> | |
<!-- document body should use a special type of class solr.TextField so analyzers can be used --> | |
<field name="collector" type="textgen" indexed="true" stored="false" multiValued="true"/> | |
<!-- collection of all fields; not stored --> | |
</fields> | |
<!-- may simplify some queries to combine fields into one --> | |
<copyField source="title" dest="collector"/> | |
<copyField source="author" dest="collector"/> | |
<copyField source="body" dest="collector"/> | |
<copyField source="organization" dest="collector"/> | |
<!-- custom type for document body --> | |
<fieldType name="document_body" class="solr.TextField"> | |
<analyzer type="index"> | |
<!-- sample; Tokenization and filtering can happen during indexing | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
<filter class="solr.KeepWordFilterFactory" words="keepwords.txt"/> | |
<filter class="solr.SynonymFilterFactory" synonyms="syns.txt"/> | |
--> | |
</analyzer> | |
<analyzer type="query"> | |
<!-- sample; Tokenization and filtering can happen during querying | |
<tokenizer class="solr.StandardTokenizerFactory"/> | |
<filter class="solr.LowerCaseFilterFactory"/> | |
--> | |
</analyzer> | |
</fieldType> | |
</schema> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment