Skip to content

Instantly share code, notes, and snippets.

@bguthrie
Created November 9, 2009 01:34
Show Gist options
  • Save bguthrie/229620 to your computer and use it in GitHub Desktop.
Save bguthrie/229620 to your computer and use it in GitHub Desktop.
package proovy;
import groovy.xml.dom.DOMCategory
import org.lobobrowser.html.parser.*
import org.lobobrowser.html.test.*
import javax.xml.parsers.DocumentBuilderFactory
import javax.xml.xpath.XPathFactory
import javax.xml.xpath.XPathConstants
import css2xpath.*
import org.w3c.dom.Document
import org.w3c.dom.Node
import org.w3c.dom.NodeList
class Proovy {
def static main(args) {
new Proovy("http://www.thoughtworks.com").search("h1, h2") { elt ->
println elt.text()
}
}
def document
def Proovy(String urlString) {
def uacontext = new SimpleUserAgentContext()
uacontext.scriptingEnabled = false
uacontext.externalCSSEnabled = false
this.document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
new HtmlParser(uacontext, document).parse(new URL(urlString).newReader())
}
def Proovy(Document document) {
this.document = document
}
def search(cssSelector, closure=null) {
use(DOMSearchCategory, DOMCategory) {
document.childNodes.search(cssSelector, closure)
}
}
class AggregatedNodeList implements NodeList {
def nodeSet = []
def addAll(nodes) {
nodes.each { this.nodeSet << it }
this.nodeSet = nodeSet.unique()
}
def int getLength() { nodeSet.size() }
def Node item(int index) { (Node) nodeSet.get(index) }
}
class DOMSearchCategory {
def static xpath(NodeList nodes, String[] xpaths, closure=null) {
def result = new AggregatedNodeList()
xpaths.each { xpath ->
result.addAll XPathFactory.newInstance().newXPath().evaluate(xpath, nodes, XPathConstants.NODESET)
}
return (closure && result.each(closure)) || result
}
def static css(NodeList nodes, selector, closure=null) {
return nodes.xpath(XPathConverter.convert(selector), closure)
}
def static search(NodeList nodes, searchString, closure=null) {
searchString =~ /^(\.\/|\/)/ ? nodes.xpath(searchString, closure) : nodes.css(searchString, closure)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment