Created
November 9, 2009 01:34
-
-
Save bguthrie/229620 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package proovy; | |
import groovy.xml.dom.DOMCategory | |
import org.lobobrowser.html.parser.* | |
import org.lobobrowser.html.test.* | |
import javax.xml.parsers.DocumentBuilderFactory | |
import javax.xml.xpath.XPathFactory | |
import javax.xml.xpath.XPathConstants | |
import css2xpath.* | |
import org.w3c.dom.Document | |
import org.w3c.dom.Node | |
import org.w3c.dom.NodeList | |
class Proovy { | |
def static main(args) { | |
new Proovy("http://www.thoughtworks.com").search("h1, h2") { elt -> | |
println elt.text() | |
} | |
} | |
def document | |
def Proovy(String urlString) { | |
def uacontext = new SimpleUserAgentContext() | |
uacontext.scriptingEnabled = false | |
uacontext.externalCSSEnabled = false | |
this.document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() | |
new HtmlParser(uacontext, document).parse(new URL(urlString).newReader()) | |
} | |
def Proovy(Document document) { | |
this.document = document | |
} | |
def search(cssSelector, closure=null) { | |
use(DOMSearchCategory, DOMCategory) { | |
document.childNodes.search(cssSelector, closure) | |
} | |
} | |
class AggregatedNodeList implements NodeList { | |
def nodeSet = [] | |
def addAll(nodes) { | |
nodes.each { this.nodeSet << it } | |
this.nodeSet = nodeSet.unique() | |
} | |
def int getLength() { nodeSet.size() } | |
def Node item(int index) { (Node) nodeSet.get(index) } | |
} | |
class DOMSearchCategory { | |
def static xpath(NodeList nodes, String[] xpaths, closure=null) { | |
def result = new AggregatedNodeList() | |
xpaths.each { xpath -> | |
result.addAll XPathFactory.newInstance().newXPath().evaluate(xpath, nodes, XPathConstants.NODESET) | |
} | |
return (closure && result.each(closure)) || result | |
} | |
def static css(NodeList nodes, selector, closure=null) { | |
return nodes.xpath(XPathConverter.convert(selector), closure) | |
} | |
def static search(NodeList nodes, searchString, closure=null) { | |
searchString =~ /^(\.\/|\/)/ ? nodes.xpath(searchString, closure) : nodes.css(searchString, closure) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment