Last active
January 12, 2016 10:58
-
-
Save chetanmeh/3f91baabf5c128dfceb0 to your computer and use it in GitHub Desktop.
Dump Document nodes and index files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package felixconsole | |
import org.apache.jackrabbit.oak.commons.IOUtils as OakIOUtils | |
import org.apache.commons.io.FileUtils | |
import org.apache.commons.io.IOUtils | |
import org.apache.jackrabbit.oak.api.Blob | |
import org.apache.jackrabbit.oak.commons.PathUtils | |
import org.slf4j.LoggerFactory | |
/** | |
* This script is meant to extract and dump NodeDocument instances via DocumentStore | |
* API to a file and also copy binary files if enabled. Once executed | |
* the dump data would be stored under `dumpDirPath` | |
*/ | |
//~------------------------------< config > | |
//NodeStore path whose content are to be dumped | |
def path = "/oak:index/lucene/:data" | |
//Directory path under which dump data would be written | |
def dumpDirPath = 'crx-quickstart/logs/dump' | |
def dumpFileName = "dump.txt" | |
//Set to true to also dump binary content | |
def dumpBinaries = true | |
//If set to true then immediate child nodes would also be | |
//dumped | |
def includeChildDocs = true | |
//~-------------------------------< Global Fields > | |
log = LoggerFactory.getLogger("script-console"); | |
ns = getStore() | |
ds = ns.documentStore //DocumentNodeStore | |
bs = ns.blobStore | |
//----------------------<Create ref to private classes/Non Exported> | |
DocCollection = loadClass('org.apache.jackrabbit.oak.plugins.document.Collection') | |
Revision = loadClass('org.apache.jackrabbit.oak.plugins.document.Revision') | |
Utils = loadClass('org.apache.jackrabbit.oak.plugins.document.util.Utils') | |
blobCollector = loadClass('org.apache.jackrabbit.oak.plugins.document.BlobCollector').newInstance(ns) | |
def dumpDir = new File(dumpDirPath) | |
FileUtils.forceMkdir(dumpDir) | |
logMsg("Files would be copied to ${dumpDir.absolutePath}") | |
File dump = new File(dumpDir, dumpFileName) | |
def blobs = [] | |
dump.withPrintWriter {pw -> | |
dumpPathAndAncestors(path, pw, blobs) | |
if (includeChildDocs){ | |
ns.readChildDocs(path, null, Integer.MAX_VALUE).each {doc -> | |
dumpDoc(doc, pw, blobs) | |
} | |
} | |
} | |
if (dumpBinaries){ | |
def processedBlobs = new HashSet() | |
def binDir = new File(dumpDir, "blobs") | |
FileUtils.forceMkdir(binDir) | |
def blobCount = 0, blobSize = 0 | |
blobs.each{/*ReferencedBlob*/ refBlob -> | |
// Code in 1.2 and 1.0 differs so workaround that | |
def blob = refBlob instanceof Blob ? refBlob : refBlob.blob | |
def id = null | |
if (refBlob.class.name.endsWith('BlobStoreBlob')){ | |
id = refBlob.blobId | |
} else if (refBlob.class.name.endsWith('ReferencedBlob')){ | |
id = refBlob.id | |
} | |
if (!id){ | |
logMsg("Ignoring blob of type ${refBlob.class}") | |
} | |
if (processedBlobs.contains(id)){ | |
return | |
} | |
try { | |
def l = blob.length() | |
//Only dump non inline binaries | |
if (l >= bs.blockSizeMin) { | |
File f = new File(binDir, id) | |
copyBlob(blob, f) | |
blobCount++ | |
blobSize += l | |
processedBlobs << id | |
} | |
} catch (Exception e){ | |
println "Error on ${id}" | |
log.warn("Failed on blob {}", id, e) | |
} | |
} | |
logMsg("Copied $blobCount blobs of size ${OakIOUtils.humanReadableByteCount(blobSize)} " + | |
"to ${binDir.absolutePath}") | |
} | |
logMsg("Dumped docs to ${dump.absolutePath}") | |
def dumpPathAndAncestors(String path, PrintWriter pw, def blobs){ | |
def paths = [] | |
int depth = PathUtils.getDepth(path) | |
(0..depth).each { d -> | |
String ancestorPath = PathUtils.getAncestorPath(path, d); | |
paths << ancestorPath | |
} | |
paths.each{String p -> | |
logMsg("Dumping $p") | |
dumpPath(p, pw, blobs) | |
} | |
} | |
def dumpPath(String path, PrintWriter pw, def blobs){ | |
def id = Utils.getIdFromPath(path) | |
def/*NodeDocument*/ doc = ds.find(DocCollection.NODES, id) | |
dumpDoc(doc, pw, blobs) | |
} | |
def dumpDoc(def doc, PrintWriter pw, def blobs){ | |
assert doc : "No document found for [$id]" | |
pw.println(doc.asString()) | |
doc.getAllPreviousDocs().each {prevDoc -> | |
pw.println(prevDoc.asString()) | |
blobCollector.collect(prevDoc, blobs) | |
} | |
blobCollector.collect(doc, blobs) | |
} | |
def logMsg(String msg){ | |
println msg | |
log.info(msg) | |
} | |
def getStore(){ | |
osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store | |
} | |
def copyBlob(Blob b, File outFile) { | |
outFile.withOutputStream { os -> | |
InputStream is = b.newStream | |
try { | |
IOUtils.copyLarge(is, os) | |
} finally { | |
is?.close() | |
} | |
} | |
} | |
Class loadClass(String className){ | |
return ns.class.classLoader.loadClass(className) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment