Skip to content

Instantly share code, notes, and snippets.

@chetanmeh
Last active August 6, 2017 08:09
Show Gist options
  • Save chetanmeh/8d68219b7bc7adc1ea5f to your computer and use it in GitHub Desktop.
Save chetanmeh/8d68219b7bc7adc1ea5f to your computer and use it in GitHub Desktop.
Oak related scripts to be executed via Felix Script Console
import groovy.text.SimpleTemplateEngine
import org.apache.jackrabbit.commons.visitor.FilteringItemVisitor
import org.apache.jackrabbit.oak.commons.IOUtils
import org.apache.sling.jcr.api.SlingRepository
import org.slf4j.LoggerFactory
import javax.jcr.Session
import javax.jcr.Node
import javax.jcr.*
import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount
SlingRepository repo = osgi.getService(SlingRepository.class)
Session s = null
logger = LoggerFactory.getLogger("script-console")
def root = '/content/dam'
try {
s = repo.loginAdministrative(null)
def typeStatsMap = [:].withDefault {key -> [count:0, name:key, size:0L]}
collectBinaryStats(s.getNode(root), typeStatsMap)
def totalSize = typeStatsMap.values().inject(0) {sum, stat -> sum + stat.size}
logp("MimeType Stats - Total size ${humanReadableByteCount(totalSize)}")
dumpStats(typeStatsMap)
} finally {
s?.logout()
}
def collectBinaryStats(Node root, def typeStatsMap){
def nodes = []
long count = 0
FilteringItemVisitor v = new FilteringItemVisitor(){
protected void entering(Property property, int i){ }
protected void leaving(Property property, int i){ }
protected void leaving(Node node, int i){ }
protected void entering(Node node, int i){
count++
if(node.primaryNodeType.name == 'nt:file'){
if(node.hasProperty('jcr:content/jcr:mimeType')){
def mimeType = node.getProperty('jcr:content/jcr:mimeType').getString()
def stats = typeStatsMap[mimeType]
Binary b = node.getProperty('jcr:content/jcr:data').getBinary()
try{
stats.size += b.size
}finally{
b.dispose()
}
stats.count++
}
}
if (count % 10000L == 0L){
logp ("Traversed $count nodes so far")
}
if (count % 1000000L == 0L){
dumpStats(typeStatsMap)
}
}
}
root.accept(v)
return nodes
}
def dumpStats(def typeStatsMap){
def stats = new ArrayList(typeStatsMap.values())
stats.each {stat -> stat.sizeString = humanReadableByteCount(stat.size)}
stats.sort {-it.size}
def columns = [
[name:"name",displayName:"Name",size:40],
[name:"count",displayName:"Count",size:10],
[name:"sizeString",displayName:"Size",size:10],
[name:"size",displayName:"Size",size:10],
]
def ttf = new TemplateFactory2()
ttf.columns = columns
def table = new SimpleTemplateEngine().createTemplate(ttf.template).make([rows:stats]).toString()
logp(table)
}
def logp(def msg){
println msg
logger.info(msg.toString())
}
class TemplateFactory2 {
def columns = []
def getTemplate() { """
${columns.collect{ " <%print \"$it.displayName\".center($it.size)%> " }.join()}
${columns.collect{ " <%print \"_\"*$it.size %> " }.join()}
<% rows.each {%>${columns.collect{ " \${it.${it.name}.toString().padRight($it.size).substring(0,$it.size)} " }.join()}
<% } %>"""
}
}
import com.google.common.base.Stopwatch
import com.google.common.collect.Iterators
import org.apache.sling.jcr.api.SlingRepository
import javax.jcr.Session
import javax.jcr.query.QueryResult
import javax.jcr.query.Row
def queryStr = '''select [jcr:path], [jcr:score], *
from [nt:unstructured] as a
where contains(.,'login')
'''
SlingRepository repo = osgi.getService(SlingRepository.class)
Stopwatch watch = Stopwatch.createStarted()
Session s = null
try {
s = repo.loginAdministrative(null)
def qm = s.workspace.queryManager
def query = qm.createQuery(queryStr,'sql')
QueryResult result = query.execute()
//Dump upto 20 paths
Iterators.limit(result.rows, 20).each {Row r -> println r.path}
} finally {
s?.logout()
}
println "Time taken to execute query " + watch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment