Skip to content

Instantly share code, notes, and snippets.

@henrik242
Last active September 3, 2020 08:44
Show Gist options
  • Save henrik242/914e99886daa380b705645be3a2dfdb9 to your computer and use it in GitHub Desktop.
Save henrik242/914e99886daa380b705645be3a2dfdb9 to your computer and use it in GitHub Desktop.
Solr backup and restore (stored values only)

Backup all stored values from solr as json batches

#!/usr/bin/env groovy
import groovy.json.JsonOutput
import groovy.json.JsonSlurper
import static java.lang.System.currentTimeMillis
class SolrBackup {
static hostname
static port
static collection
static key
static batchsize = 100
static void main(String... args) {
if (args.size() < 4) {
System.err.println("""
Usage: ./solr-backup.groovy hostname port collection key [batchsize]
e.g. /solr-backup.groovy dev-solr666.example.com 10666 collection_foo key 10000
NB! Will only backup stored values!
""")
System.exit(1)
}
hostname = args[0]
port = args[1]
collection = args[2]
key = args[3]
if (args.size() > 4) {
batchsize = args[4]
}
start()
}
static URL url(String cursormark) {
new URL("http://$hostname:$port/solr/$collection/select?q=*%3A*&cursorMark=$cursormark&sort=$key%20asc&rows=$batchsize&wt=json")
}
static void start() {
String cursormark = "*"
String basename = "$hostname-$port-$collection-$key"
int counter = 0
int docsWritten = 0
int totalElapsed = 0
String getResult
int resCode
while (true) {
long start = currentTimeMillis()
((HttpURLConnection) url(cursormark).openConnection()).with {
setRequestProperty('content-type', 'application/json')
resCode = responseCode
getResult = responseCode < 400 ? inputStream.text : errorStream.text
}
if (resCode != 200) {
System.err.println("Error with http status $resCode:\n $getResult")
System.exit(1)
}
def doc = new JsonSlurper().parseText(getResult)
cursormark = doc.nextCursorMark
def docs = doc.response?.docs
if (docs == null || !cursormark) {
System.err.println("No docs or no cursormark")
System.exit(1)
}
if (docs.size() < 1) {
break
}
docs.each {
it.remove("_version_")
}
def jsonDocs = JsonOutput.toJson(docs)
def file = new File("$basename.${counter++}")
file.write(jsonDocs)
docsWritten += docs.size()
def elapsed = currentTimeMillis() - start
totalElapsed += elapsed
System.out.println("Writing ${docs.size()} docs to ${file.name} in $elapsed ms. " +
"Progress: $docsWritten / ${doc.response.numFound} docs in $totalElapsed ms")
}
System.out.println("Finished")
}
}
#!/usr/bin/env groovy
import groovy.json.JsonSlurper
import static java.lang.System.currentTimeMillis
import static java.nio.charset.StandardCharsets.UTF_8
class SolrRestore {
static hostname
static port
static collection
static basename
static void main(String... args) {
if (args.size() != 4) {
System.err.println("""
Usage: ./solr-restore.groovy hostname port collection basefilename (will read all consecutive basefilename.[0-9]+)
e.g. ./solr-restore.groovy dev-solr666.example.com 10666 collection_foo dev-solr666.example.com-10666-collection_foo-key
""")
System.exit(1)
}
hostname = args[0]
port = args[1]
collection = args[2]
basename = args[3]
start()
}
static URL url() {
new URL("http://$hostname:$port/solr/$collection/update?commit=true&wt=json")
}
static void error(String error, String result, Exception e = null) {
System.out.println("$error:\n\n$result")
e?.printStackTrace()
System.exit(1)
}
static void start() {
String postResult
int resCode
int counter = 0
int totalElapsed = 0
while (true) {
long start = currentTimeMillis()
def file = new File("$basename.${counter++}")
if (!file.exists()) {
System.out.println("No more files found")
break
}
System.out.print("Reading $file... ")
def jsonDocs = file.text
((HttpURLConnection) url().openConnection()).with {
requestMethod = 'POST'
doOutput = true
setRequestProperty('content-type', 'application/json')
getOutputStream().write(jsonDocs.getBytes(UTF_8))
resCode = responseCode
postResult = responseCode < 400 ? inputStream.text : errorStream.text
}
if (resCode != 200) {
error("http error with status $resCode", postResult)
}
def jsonResult
try {
jsonResult = new JsonSlurper().parseText(postResult)
} catch (Exception e) {
error("json parsing error", postResult, e)
}
def elapsed = currentTimeMillis() - start
totalElapsed += elapsed
if (jsonResult?.responseHeader?.status != 0) {
error("response status error", postResult)
} else {
System.out.println("restored in ${elapsed} ms. Progress: $totalElapsed ms")
}
}
System.out.println("Finished")
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment