Last active
May 4, 2017 16:46
-
-
Save yauh/9051501 to your computer and use it in GitHub Desktop.
Making Solr import XML files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<dataConfig> | |
<dataSource name="fds" encoding="ISO-8859-1" type="FileDataSource"/> | |
<document> | |
<entity name="files" | |
dataSource="null" | |
rootEntity="false" | |
processor="FileListEntityProcessor" | |
baseDir="/tmp/provision/import" | |
fileName=".*\.xml" | |
onError="abort" | |
recursive="true"> | |
<entity name="file" | |
processor="XPathEntityProcessor" | |
pk="title" | |
dataSource="files" | |
stream="true" | |
forEach="/SVVZ/Modules/Module" | |
onError="abort" | |
transformer="RegexTransformer" | |
url="${files.fileAbsolutePath}"> | |
<field column="title" xpath="/SVVZ/Modules/Module/CAMO_TITLEGER" /> | |
</entity> | |
</entity> | |
</document> | |
</dataConfig> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"responseHeader": { | |
"status": 0, | |
"QTime": 63 | |
}, | |
"initArgs": [ | |
"defaults", | |
[ | |
"config", | |
"/usr/local/src/solr_core/test/conf/data-config.xml" | |
] | |
], | |
"command": "full-import", | |
"mode": "debug", | |
"documents": [], | |
"verbose-output": [ | |
"entity:files", | |
[ | |
null, | |
"----------- row #1-------------", | |
"fileSize", | |
403, | |
"fileLastModified", | |
"2014-02-18T11:06:09Z", | |
"fileAbsolutePath", | |
"/tmp/provision/import/sample.xml", | |
"fileDir", | |
"/tmp/provision/import", | |
"file", | |
"sample.xml", | |
null, | |
"---------------------------------------------", | |
"entity:file", | |
[ | |
"query", | |
"/tmp/provision/import/sample.xml", | |
"time-taken", | |
"0:0:0.1", | |
null, | |
"----------- row #1-------------", | |
"titel", | |
[ | |
"I want this to be my field value" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------", | |
"transformer:RegexTransformer", | |
[ | |
null, | |
"---------------------------------------------", | |
"titel", | |
[ | |
"I want this to be my field value" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------" | |
], | |
null, | |
"----------- row #2-------------", | |
"titel", | |
[ | |
"A second title" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------", | |
"transformer:RegexTransformer", | |
[ | |
null, | |
"---------------------------------------------", | |
"titel", | |
[ | |
"A second title" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------" | |
] | |
] | |
] | |
], | |
"status": "idle", | |
"importResponse": "", | |
"statusMessages": { | |
"Total Requests made to DataSource": "0", | |
"Total Rows Fetched": "3", | |
"Total Documents Skipped": "0", | |
"Full Dump Started": "2014-02-18 11:15:47", | |
"": "Indexing completed. Added/Updated: 0 documents. Deleted 0 documents.", | |
"Committed": "2014-02-18 11:15:47", | |
"Total Documents Processed": "0", | |
"Time taken": "0:0:0.49" | |
}, | |
"WARNING": "This response format is experimental. It is likely to change in the future." | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="ISO-8859-1"?> | |
<?xml-stylesheet type="text/xsl" href="http://localhost/svvz_default.xsl" ?> | |
<SVVZ> | |
<SVVZTerm>WS 13/14</SVVZTerm> | |
<Modules> | |
<Module> | |
<CAMO_TITLEGER>I want this to be my field value</CAMO_TITLEGER> | |
</Module> | |
<Module> | |
<CAMO_TITLEGER>A second title</CAMO_TITLEGER> | |
</Module> | |
</Modules> | |
</SVVZ> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" ?> | |
<schema name="simple" version="1.1"> | |
<types> | |
<fieldtype name="string" class="solr.StrField" /> | |
<fieldType name="long" class="solr.TrieLongField" /> | |
<fieldtype name='text' class='solr.TextField'> | |
<analyzer> | |
<tokenizer class='solr.WhitespaceTokenizerFactory' /> | |
<filter class='solr.LowerCaseFilterFactory' /> | |
</analyzer> | |
</fieldtype> | |
</types> | |
<fields> | |
<dynamicField name='*' type='string' multiValued='true' indexed='true' stored='true' /> | |
<copyField source='*' dest='fulltext' /> | |
<field name='fullText' type='string' multiValued='true' /> | |
</fields> | |
<defaultSearchField>fullText</defaultSearchField> | |
<solrQueryParser defaultOperator='OR' /> | |
</schema> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<config> | |
<luceneMatchVersion>4.6</luceneMatchVersion> | |
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true" /> | |
<requestHandler name="/update" class="solr.UpdateRequestHandler" /> | |
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> | |
<admin> | |
<defaultQuery>*:*</defaultQuery> | |
</admin> | |
<lib dir="/usr/local/src/solr-4.6.1/dist/" regex="solr-dataimporthandler-.*\.jar" /> | |
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> | |
<lst name="defaults"> | |
<str name="config">/usr/local/src/dbm_solr_core/dbm/conf/data-config.xml</str> | |
</lst> | |
</requestHandler> | |
</config> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I tried working this code but i keep getting the error "missing content stream"!! Can you please help if you have any idea? I am using Sol 6.5.0