Last active
March 12, 2022 15:40
-
-
Save chetanmeh/e67a2e2ff0e8e376ea7779d7b9c430b7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package felixconsole | |
import com.google.common.base.Charsets | |
import com.google.common.hash.Hashing | |
import groovy.json.JsonOutput | |
import groovy.json.JsonSlurper | |
import org.apache.commons.io.FileUtils | |
import org.apache.jackrabbit.core.data.DataIdentifier | |
import org.apache.jackrabbit.core.data.DataRecord | |
import org.apache.jackrabbit.core.data.FileDataStore | |
import org.apache.jackrabbit.oak.api.Blob | |
import org.apache.jackrabbit.oak.api.Type | |
import org.apache.jackrabbit.oak.commons.PathUtils | |
import org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants | |
import org.apache.jackrabbit.oak.spi.blob.BlobStore | |
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry | |
import org.apache.jackrabbit.oak.spi.state.NodeBuilder | |
import org.apache.jackrabbit.oak.spi.state.NodeState | |
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils | |
import org.apache.jackrabbit.oak.spi.state.NodeStore | |
import org.apache.lucene.store.Directory | |
import org.apache.lucene.store.FSDirectory | |
import org.apache.lucene.store.IOContext | |
import org.apache.lucene.store.NoLockFactory | |
import org.slf4j.LoggerFactory | |
import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE | |
import static org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent.INITIAL_CONTENT | |
//Directory path under which dump data would be written | |
def dumpDirPath = 'crx-quickstart/logs/dump' | |
def cowDir = 'crx-quickstart/repository/index' | |
def indexDataPath = 'crx-quickstart/logs/index-data.json' | |
//----------------------<Create ref to private classes/Non Exported> | |
OakDirectory = loadLuceneClass('org.apache.jackrabbit.oak.plugins.index.lucene.OakDirectory') | |
IndexDefinition = loadLuceneClass('org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition') | |
//~-------------------------------< Global Fields > | |
log = LoggerFactory.getLogger("script-console"); | |
ns = getStore() | |
BlobStore blobStore = getBlobStore() | |
//~---------------------< Index File Copying > | |
File indexDataFile = new File(indexDataPath) | |
assert indexDataFile.exists() : "No file found at ${indexDataFile.absolutePath}" | |
def indexDataFromRepo = new JsonSlurper().parse(new StringReader(indexDataFile.text)) | |
def dumpDir = new File(dumpDirPath) | |
FileUtils.forceMkdir(dumpDir) | |
logMsg("Files would be copied to ${dumpDir.absolutePath}") | |
File idxData = new File(dumpDir, "local-index-data.json") | |
def dsPath = "$dumpDirPath/datastore" | |
FileDataStore ds = new FileDataStore() | |
ds.path = dsPath | |
ds.init(null) | |
File sourceDir = new File(cowDir) | |
def hashToPathMapping = getIndexPaths() | |
def output = [:] | |
sourceDir.listFiles().each {File indexRootDir -> | |
if (!indexRootDir.isDirectory()){ | |
return | |
} | |
String indexDirName = indexRootDir.name | |
String idxPath = hashToPathMapping[indexDirName] ?: indexDirName | |
logMsg("Processing $idxPath") | |
String indexName = PathUtils.getName(idxPath) | |
def idxDataRepo = indexDataFromRepo[indexName] | |
if (!idxDataRepo){ | |
logMsg("No index data found for $idxPath in json file") | |
return | |
} | |
if(idxDataRepo.valid){ | |
logMsg("Skipping processing of $idxPath as its valid") | |
//TODO Renebale | |
return | |
} | |
def fileData = [:] | |
indexRootDir.listFiles().each { File indexDir -> | |
Directory sourceIdxDir = FSDirectory.open(indexDir, NoLockFactory.noLockFactory) | |
sourceIdxDir.listAll().each { String fileName -> | |
//Determine the uniqueKey for the index file | |
def fileDataRepo = idxDataRepo.files.get(fileName) | |
if (fileDataRepo.isEmpty()){ | |
logMsg("Skipping processing of $idxPath/$fileName as no data found in index data json") | |
return | |
} | |
String key = fileDataRepo[0].key | |
def idxDefn = IndexDefinition.newInstance(INITIAL_CONTENT, EMPTY_NODE, null) | |
NodeBuilder nb = EMPTY_NODE.builder() | |
//Seed in the key | |
nb.child(':data').child(fileName).setProperty('uniqueKey', key) | |
nb.child(':data').child(fileName).setProperty('blobSize', idxDefn.getBlobSize()) | |
Directory dir = OakDirectory.newInstance(nb, idxDefn, false) | |
sourceIdxDir.copy(dir, fileName, fileName, IOContext.READ) | |
NodeBuilder fileNode = nb.getChildNode(':data').getChildNode(fileName) | |
def binaries = fileNode.getProperty('jcr:data').getValue(Type.BINARIES) | |
logMsg("\tCopying $fileName") | |
def blobIds = [:] | |
long srcSizes = 0, destSized = 0 | |
binaries.each { Blob b -> | |
InputStream is = b.newStream | |
DataRecord dr = ds.addRecord(is) | |
is.close() | |
def blobid = dr.identifier.toString() | |
logMsg("\t\tCreated blob $blobid, Size ${dr.length}") | |
blobIds[blobid] = [size:dr.length, originalSize: b.length()] | |
srcSizes += b.length() as long | |
destSized += dr.length as long | |
} | |
//assert srcSizes == destSized | |
//assert srcSizes == sourceIdxDir.fileLength(fileName) | |
fileData[fileName] = blobIds | |
} | |
} | |
output[idxPath] = [files: fileData] | |
} | |
idxData.withWriter {w -> | |
w.print(JsonOutput.prettyPrint(JsonOutput.toJson(output))) | |
} | |
/* | |
"cqPageLucene": { | |
"files": { | |
"_0.si": [ | |
{ | |
"blobId": "f76dedbc-b66b-4a3f-af95-45b787d3c917.d2bb", | |
"valid": true, | |
"key": "ab900a4ca931cb8cc6dd2bd5b79b6df0" | |
} | |
], | |
"_0.cfe": [ | |
{ | |
"blobId": "f76dedbc-b66b-4a3f-af95-45b787d3c917.d306", | |
"valid": true, | |
"key": "d333c042915ae96494b76836ed3f3aba" | |
} | |
] | |
*/ | |
int invalidBlobCount = 0 | |
int recoveredBlobCount = 0 | |
indexDataFromRepo.each {idxName, data -> | |
data.files.each{fileName, fileData -> | |
fileData.each{ blobData -> | |
String blobId = blobData.blobId | |
if (blobId.contains('#') && !blobData.valid){ | |
invalidBlobCount ++ | |
String fileId = blobId.substring(0, blobId.indexOf('#')) | |
def stored = ds.getRecordIfStored(new DataIdentifier(fileId)) | |
logMsg("BlobId : $fileId, Recovered ${stored}") | |
if (stored){ | |
recoveredBlobCount++ | |
} | |
} | |
} | |
} | |
} | |
logMsg("invalidBlobCount: $invalidBlobCount, recovered: $recoveredBlobCount") | |
logMsg("Done copying") | |
Object getIndexPaths() { | |
def map = [:] | |
collectIndexData("", map) | |
collectIndexData("/content", map) | |
return map | |
} | |
def collectIndexData(String basePath, def map){ | |
NodeStore ns = getStore() | |
NodeState oakIndex = NodeStateUtils.getNode(ns.root, "$basePath/oak:index") | |
oakIndex.childNodeEntries.each {ChildNodeEntry cne -> | |
if (cne.getNodeState().getString('type') == 'lucene'){ | |
String idxPath = "$basePath/oak:index/${cne.name}" | |
String hash = Hashing.sha256().hashString(idxPath, Charsets.UTF_8).toString() | |
map[hash] = idxPath | |
} | |
} | |
} | |
def logMsg(String msg){ | |
println msg | |
log.info(msg) | |
} | |
Class loadLuceneClass(String className){ | |
return LuceneIndexConstants.class.classLoader.loadClass(className) | |
} | |
def getStore(){ | |
osgi.getService(org.apache.sling.jcr.api.SlingRepository.class).manager.store | |
} | |
def getBlobStore(){ | |
osgi.getService(org.apache.jackrabbit.oak.spi.blob.BlobStore.class) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment