-
-
Save yauh/9051501 to your computer and use it in GitHub Desktop.
<dataConfig> | |
<dataSource name="fds" encoding="ISO-8859-1" type="FileDataSource"/> | |
<document> | |
<entity name="files" | |
dataSource="null" | |
rootEntity="false" | |
processor="FileListEntityProcessor" | |
baseDir="/tmp/provision/import" | |
fileName=".*\.xml" | |
onError="abort" | |
recursive="true"> | |
<entity name="file" | |
processor="XPathEntityProcessor" | |
pk="title" | |
dataSource="files" | |
stream="true" | |
forEach="/SVVZ/Modules/Module" | |
onError="abort" | |
transformer="RegexTransformer" | |
url="${files.fileAbsolutePath}"> | |
<field column="title" xpath="/SVVZ/Modules/Module/CAMO_TITLEGER" /> | |
</entity> | |
</entity> | |
</document> | |
</dataConfig> |
{ | |
"responseHeader": { | |
"status": 0, | |
"QTime": 63 | |
}, | |
"initArgs": [ | |
"defaults", | |
[ | |
"config", | |
"/usr/local/src/solr_core/test/conf/data-config.xml" | |
] | |
], | |
"command": "full-import", | |
"mode": "debug", | |
"documents": [], | |
"verbose-output": [ | |
"entity:files", | |
[ | |
null, | |
"----------- row #1-------------", | |
"fileSize", | |
403, | |
"fileLastModified", | |
"2014-02-18T11:06:09Z", | |
"fileAbsolutePath", | |
"/tmp/provision/import/sample.xml", | |
"fileDir", | |
"/tmp/provision/import", | |
"file", | |
"sample.xml", | |
null, | |
"---------------------------------------------", | |
"entity:file", | |
[ | |
"query", | |
"/tmp/provision/import/sample.xml", | |
"time-taken", | |
"0:0:0.1", | |
null, | |
"----------- row #1-------------", | |
"titel", | |
[ | |
"I want this to be my field value" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------", | |
"transformer:RegexTransformer", | |
[ | |
null, | |
"---------------------------------------------", | |
"titel", | |
[ | |
"I want this to be my field value" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------" | |
], | |
null, | |
"----------- row #2-------------", | |
"titel", | |
[ | |
"A second title" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------", | |
"transformer:RegexTransformer", | |
[ | |
null, | |
"---------------------------------------------", | |
"titel", | |
[ | |
"A second title" | |
], | |
"$forEach", | |
"/SVVZ/Modules/Module", | |
null, | |
"---------------------------------------------" | |
] | |
] | |
] | |
], | |
"status": "idle", | |
"importResponse": "", | |
"statusMessages": { | |
"Total Requests made to DataSource": "0", | |
"Total Rows Fetched": "3", | |
"Total Documents Skipped": "0", | |
"Full Dump Started": "2014-02-18 11:15:47", | |
"": "Indexing completed. Added/Updated: 0 documents. Deleted 0 documents.", | |
"Committed": "2014-02-18 11:15:47", | |
"Total Documents Processed": "0", | |
"Time taken": "0:0:0.49" | |
}, | |
"WARNING": "This response format is experimental. It is likely to change in the future." | |
} |
<?xml version="1.0" encoding="ISO-8859-1"?> | |
<?xml-stylesheet type="text/xsl" href="http://localhost/svvz_default.xsl" ?> | |
<SVVZ> | |
<SVVZTerm>WS 13/14</SVVZTerm> | |
<Modules> | |
<Module> | |
<CAMO_TITLEGER>I want this to be my field value</CAMO_TITLEGER> | |
</Module> | |
<Module> | |
<CAMO_TITLEGER>A second title</CAMO_TITLEGER> | |
</Module> | |
</Modules> | |
</SVVZ> |
<?xml version="1.0" ?> | |
<schema name="simple" version="1.1"> | |
<types> | |
<fieldtype name="string" class="solr.StrField" /> | |
<fieldType name="long" class="solr.TrieLongField" /> | |
<fieldtype name='text' class='solr.TextField'> | |
<analyzer> | |
<tokenizer class='solr.WhitespaceTokenizerFactory' /> | |
<filter class='solr.LowerCaseFilterFactory' /> | |
</analyzer> | |
</fieldtype> | |
</types> | |
<fields> | |
<dynamicField name='*' type='string' multiValued='true' indexed='true' stored='true' /> | |
<copyField source='*' dest='fulltext' /> | |
<field name='fullText' type='string' multiValued='true' /> | |
</fields> | |
<defaultSearchField>fullText</defaultSearchField> | |
<solrQueryParser defaultOperator='OR' /> | |
</schema> |
<?xml version="1.0" encoding="UTF-8" ?> | |
<config> | |
<luceneMatchVersion>4.6</luceneMatchVersion> | |
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true" /> | |
<requestHandler name="/update" class="solr.UpdateRequestHandler" /> | |
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> | |
<admin> | |
<defaultQuery>*:*</defaultQuery> | |
</admin> | |
<lib dir="/usr/local/src/solr-4.6.1/dist/" regex="solr-dataimporthandler-.*\.jar" /> | |
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> | |
<lst name="defaults"> | |
<str name="config">/usr/local/src/dbm_solr_core/dbm/conf/data-config.xml</str> | |
</lst> | |
</requestHandler> | |
</config> |
I think you need a rootEntity="false" on the parent "files" entity. The rootEntity="true" on the child is not required.
Also, you have . I guess that's a typo and it should be "title"?
Seems like the rootEntity in fact was blocking my success, thanks a lot!!
Also, since I do some stuff with German language the "titel" is indeed not a typo, although it looks like it :-)
You should not set rootEntity=false on the "file" child entity. Just leave out the rootEntity attribute altogether from 'file'. In the absence of any root entities, no documents will be indexed! The rule is that each row emitted by the root entity creates a Solr document.
The updated files appear to work nicely. Thanks for all the great help!
I tried working this code but i keep getting the error "missing content stream"!! Can you please help if you have any idea? I am using Sol 6.5.0
I would like to import xml in the same structure as the sample.xml file with /SVVZ/Modules/Module being a document. I can see Solr recognizes a document (Total Rows fetched) but somehow it does not create a document.
I am running the data import with a full-import through the web admin interface. Not sure about the 0 requests made to data source, though.
Any helpful words?