Created
June 23, 2016 09:41
-
-
Save chetanmeh/ccaa10e6ee8c9b0775484e6a9f6ba06e to your computer and use it in GitHub Desktop.
Script to analyze the content of property indexes. Number of entries, number of indexes etc on a fine grained basis
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.common.base.Function | |
import com.google.common.base.Stopwatch | |
import com.google.common.collect.FluentIterable | |
import com.google.common.collect.TreeTraverser | |
import groovy.json.JsonOutput | |
import groovy.text.SimpleTemplateEngine | |
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry | |
import org.apache.jackrabbit.oak.spi.state.NodeState | |
import static com.google.common.collect.Iterables.transform | |
class IndexStatsHelper { | |
def ns | |
def stats = [] | |
def maxIndexedEntryCount = 10 | |
def statsFileJson = new File("index-stats.json") | |
def statsFileTxt = new File("index-stats.txt") | |
def dumpStats(){ | |
Stopwatch w = Stopwatch.createStarted() | |
ns.getRoot().getChildNode("oak:index").childNodeEntries.each{ChildNodeEntry cne -> | |
def idxName = cne.name | |
def idxState = cne.nodeState | |
def type = idxState.getString("type") | |
if (type == 'property' || type == 'disabled'){ | |
print0("Processing $idxName") | |
boolean unique = idxState.getBoolean('unique') | |
def idxStats = [childCount: 0, entryCount: 0] | |
idxStats.name = idxName | |
idxStats.disabled = type == 'disabled' | |
idxStats.unique = unique | |
def contentNode = idxState.getChildNode(':index') | |
def indexedStats = [] | |
if (unique) { | |
idxStats.childCount = getTraversor(contentNode).size() | |
idxStats.entryCount = idxStats.childCount | |
} else { | |
contentNode.childNodeEntries.each {ChildNodeEntry indexedStateEntry -> | |
def indexedEntryName = indexedStateEntry.name | |
def indexEntryStats = getIndexEntryStats(indexedStateEntry.nodeState) | |
idxStats.entryCount += indexEntryStats.entryCount | |
idxStats.childCount += indexEntryStats.childCount | |
indexedStats << [ | |
name: indexedEntryName, | |
childCount : indexEntryStats.childCount, | |
entryCount: indexEntryStats.entryCount | |
] | |
} | |
} | |
indexedStats.sort {-it.childCount} | |
idxStats.indexedEntryCount = indexedStats.size() | |
indexedStats = indexedStats.subList(0, Math.min(indexedStats.size(), maxIndexedEntryCount)) | |
print0(" $idxStats") | |
idxStats.indexedStats = indexedStats | |
stats << idxStats | |
} | |
} | |
stats.sort{-it.childCount} | |
statsFileTxt.withPrintWriter { pw -> | |
def header = "Overall index stats" | |
def output = dumpStats(header, stats) | |
print0(output) | |
pw.println(output) | |
stats.each { s -> | |
if (s.childCount > 0 && !s.unique) { | |
header = "Stats for [${s.name}]." | |
if (s.indexedEntryCount > maxIndexedEntryCount) { | |
header += " Listing top ${s.indexedStats.size()} out of total ${s.indexedEntryCount}" | |
} | |
output = dumpStats(header, s.indexedStats) | |
print0(output) | |
pw.println(output) | |
} | |
} | |
} | |
statsFileJson.text = JsonOutput.prettyPrint(JsonOutput.toJson(stats)) | |
print0("Stats in json format dumped to ${statsFileJson.getAbsolutePath()}") | |
print0("Stats in tx format dumped to ${statsFileTxt.getAbsolutePath()}") | |
print0("Total time taken : $w") | |
} | |
def print0(def msg){ | |
println(msg) | |
} | |
def getIndexEntryStats(NodeState idxNodeState){ | |
FluentIterable<NodeState> itr = getTraversor(idxNodeState) | |
def stats = [entryCount : 0, childCount : 0] | |
itr.each {NodeState ns -> | |
if (ns.hasProperty('match')){ | |
stats.entryCount++ | |
} | |
stats.childCount++ | |
} | |
return stats | |
} | |
FluentIterable<NodeState> getTraversor(NodeState ns ){ | |
def traversor = new TreeTraverser<NodeState>(){ | |
Iterable<NodeState> children(NodeState root) { | |
return transform(root.childNodeEntries, { ChildNodeEntry cne -> | |
cne.nodeState | |
} as Function) | |
} | |
} | |
return traversor.preOrderTraversal(ns) | |
} | |
def dumpStats(def header, def stats){ | |
StringWriter sw = new StringWriter() | |
PrintWriter pw = new PrintWriter(sw) | |
pw.println(header) | |
pw.println() | |
stats.sort { -it.childCount } | |
def columns = [ | |
[name:"entryCount",displayName:"Entry Count",size:10], | |
[name:"childCount",displayName:"Child Count",size:10], | |
[name:"name",displayName:"Name",size:45], | |
] | |
def ttf = new TemplateFactory() | |
ttf.columns = columns | |
pw.println(new SimpleTemplateEngine().createTemplate(ttf.template).make([rows:stats]).toString()) | |
return sw.toString() | |
} | |
class TemplateFactory { | |
def columns = [] | |
def getTemplate() { """ | |
${columns.collect{ " <%print \"$it.displayName\".center($it.size)%> " }.join()} | |
${columns.collect{ " <%print \"_\"*$it.size %> " }.join()} | |
<% rows.each {%>${columns.collect{ " \${it.${it.name}.toString().padRight($it.size).substring(0,$it.size)} " }.join()} | |
<% } %>""" | |
} | |
} | |
} | |
new IndexStatsHelper(ns:session.store).dumpStats() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment