Skip to content

Instantly share code, notes, and snippets.

@amalinovskiy
Created November 16, 2016 12:42
Show Gist options
  • Save amalinovskiy/743ca7ef2548a28e68b085200f87bde8 to your computer and use it in GitHub Desktop.
Save amalinovskiy/743ca7ef2548a28e68b085200f87bde8 to your computer and use it in GitHub Desktop.
Export documents from Solr core to XML format
#!/usr/bin/env groovy
/**
* Usage: ./SolrExporter.groovy query sort url [url]
*
* ./SolrExporter.groovy "id:12345" "id" "http://your.solr.host:8983/solr/core/"
*
* ./SolrExporter.groovy "id:12345" "id" "http://old.solr.host:8983/solr/core/" "http://new.solr.host:8983/solr/core/"
*
*
* You can also use this expoorter to reindex Solr (e.g. after incompatible schema change):
*
* ./SolrExporter.groovy "*:*" "id" "http://localhost:8983/solr/core/" "http://localhost:8983/solr/core/"
*/
import org.apache.solr.client.solrj.SolrQuery
import org.apache.solr.client.solrj.SolrQuery.SortClause
import org.apache.solr.client.solrj.response.QueryResponse
import org.apache.solr.client.solrj.SolrClient
import org.apache.solr.client.solrj.impl.HttpSolrClient
import org.apache.solr.client.solrj.util.ClientUtils
import org.apache.solr.common.SolrDocument
import org.apache.solr.common.SolrInputDocument
@Grapes([
@Grab(group = 'org.apache.solr', module = 'solr-solrj', version = '5.5.3'),
@Grab(group = 'org.slf4j', module = 'slf4j-simple', version = '1.6.4')
])
class SolrDocumentExporter {
private SolrClient sourceServer
private SolrClient targetServer
private String q
private String sort
SolrDocumentExporter(q, sort, source) {
this(q, sort, source, null)
}
SolrDocumentExporter(q, sort, source, target) {
this.q = q
this.sort = sort
sourceServer = new HttpSolrClient(source)
if (target) targetServer = new HttpSolrClient(target)
else println("<docs>")
}
void exportDocuments() {
List<SolrDocument> resultDocuments
String cursorMark = "*"
(resultDocuments, cursorMark) = executeQuery(sourceServer, cursorMark)
while (resultDocuments.size() > 0) {
List<SolrInputDocument> inputDocuments = inputDocuments(resultDocuments)
if (targetServer) updateDocuments(targetServer, inputDocuments)
else printDocuments(inputDocuments)
(resultDocuments, cursorMark) = executeQuery(sourceServer, cursorMark)
}
if (targetServer == null) println("</docs>")
}
private SolrQuery solrQuery(mark) {
SolrQuery query = new SolrQuery()
query.set("q", this.q)
query.set("rows", 10000)
query.setSort(SortClause.asc(this.sort))
query.set("cursorMark", mark)
return query
}
private List<SolrDocument> executeQuery(server, cursorMark) {
QueryResponse response = server.query(solrQuery(cursorMark))
return [response.results, response.nextCursorMark]
}
private List<SolrInputDocument> inputDocuments(documents) {
documents.collect { ClientUtils.toSolrInputDocument(it) }
}
private void updateDocuments(server, documents) {
server.add(documents)
server.commit()
}
private void printDocuments(documents) {
documents.each { println(ClientUtils.toXML(it)) }
}
}
target = (args.length > 3) ? args[3] : null
new SolrDocumentExporter(args[0], args[1], args[2], target).exportDocuments()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment