Skip to content

Instantly share code, notes, and snippets.

@yauh
Last active May 4, 2017 16:46
Show Gist options
  • Save yauh/9051501 to your computer and use it in GitHub Desktop.
Save yauh/9051501 to your computer and use it in GitHub Desktop.
Making Solr import XML files
<dataConfig>
<dataSource name="fds" encoding="ISO-8859-1" type="FileDataSource"/>
<document>
<entity name="files"
dataSource="null"
rootEntity="false"
processor="FileListEntityProcessor"
baseDir="/tmp/provision/import"
fileName=".*\.xml"
onError="abort"
recursive="true">
<entity name="file"
processor="XPathEntityProcessor"
pk="title"
dataSource="files"
stream="true"
forEach="/SVVZ/Modules/Module"
onError="abort"
transformer="RegexTransformer"
url="${files.fileAbsolutePath}">
<field column="title" xpath="/SVVZ/Modules/Module/CAMO_TITLEGER" />
</entity>
</entity>
</document>
</dataConfig>
{
"responseHeader": {
"status": 0,
"QTime": 63
},
"initArgs": [
"defaults",
[
"config",
"/usr/local/src/solr_core/test/conf/data-config.xml"
]
],
"command": "full-import",
"mode": "debug",
"documents": [],
"verbose-output": [
"entity:files",
[
null,
"----------- row #1-------------",
"fileSize",
403,
"fileLastModified",
"2014-02-18T11:06:09Z",
"fileAbsolutePath",
"/tmp/provision/import/sample.xml",
"fileDir",
"/tmp/provision/import",
"file",
"sample.xml",
null,
"---------------------------------------------",
"entity:file",
[
"query",
"/tmp/provision/import/sample.xml",
"time-taken",
"0:0:0.1",
null,
"----------- row #1-------------",
"titel",
[
"I want this to be my field value"
],
"$forEach",
"/SVVZ/Modules/Module",
null,
"---------------------------------------------",
"transformer:RegexTransformer",
[
null,
"---------------------------------------------",
"titel",
[
"I want this to be my field value"
],
"$forEach",
"/SVVZ/Modules/Module",
null,
"---------------------------------------------"
],
null,
"----------- row #2-------------",
"titel",
[
"A second title"
],
"$forEach",
"/SVVZ/Modules/Module",
null,
"---------------------------------------------",
"transformer:RegexTransformer",
[
null,
"---------------------------------------------",
"titel",
[
"A second title"
],
"$forEach",
"/SVVZ/Modules/Module",
null,
"---------------------------------------------"
]
]
]
],
"status": "idle",
"importResponse": "",
"statusMessages": {
"Total Requests made to DataSource": "0",
"Total Rows Fetched": "3",
"Total Documents Skipped": "0",
"Full Dump Started": "2014-02-18 11:15:47",
"": "Indexing completed. Added/Updated: 0 documents. Deleted 0 documents.",
"Committed": "2014-02-18 11:15:47",
"Total Documents Processed": "0",
"Time taken": "0:0:0.49"
},
"WARNING": "This response format is experimental. It is likely to change in the future."
}
<?xml version="1.0" encoding="ISO-8859-1"?>
<?xml-stylesheet type="text/xsl" href="http://localhost/svvz_default.xsl" ?>
<SVVZ>
<SVVZTerm>WS 13/14</SVVZTerm>
<Modules>
<Module>
<CAMO_TITLEGER>I want this to be my field value</CAMO_TITLEGER>
</Module>
<Module>
<CAMO_TITLEGER>A second title</CAMO_TITLEGER>
</Module>
</Modules>
</SVVZ>
<?xml version="1.0" ?>
<schema name="simple" version="1.1">
<types>
<fieldtype name="string" class="solr.StrField" />
<fieldType name="long" class="solr.TrieLongField" />
<fieldtype name='text' class='solr.TextField'>
<analyzer>
<tokenizer class='solr.WhitespaceTokenizerFactory' />
<filter class='solr.LowerCaseFilterFactory' />
</analyzer>
</fieldtype>
</types>
<fields>
<dynamicField name='*' type='string' multiValued='true' indexed='true' stored='true' />
<copyField source='*' dest='fulltext' />
<field name='fullText' type='string' multiValued='true' />
</fields>
<defaultSearchField>fullText</defaultSearchField>
<solrQueryParser defaultOperator='OR' />
</schema>
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<luceneMatchVersion>4.6</luceneMatchVersion>
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>
<lib dir="/usr/local/src/solr-4.6.1/dist/" regex="solr-dataimporthandler-.*\.jar" />
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">/usr/local/src/dbm_solr_core/dbm/conf/data-config.xml</str>
</lst>
</requestHandler>
</config>
@yauh
Copy link
Author

yauh commented Feb 18, 2014

The updated files appear to work nicely. Thanks for all the great help!

@ShalakaGit
Copy link

I tried working this code but i keep getting the error "missing content stream"!! Can you please help if you have any idea? I am using Sol 6.5.0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment