Skip to content

Instantly share code, notes, and snippets.

@jmoiron
Created November 1, 2012 18:00
Show Gist options
  • Save jmoiron/3995399 to your computer and use it in GitHub Desktop.
Save jmoiron/3995399 to your computer and use it in GitHub Desktop.
xml wrapping functions for lxml-ish css selection
// Selectable implements a simple interface which allows to get the inner text
// of some element as well as run a CSS select on it and get a list of nodes
type Selectable interface {
CssSelect(selector string) []Node
Text() string
}
// A node wrapper, in order to provide a similar interface in the future
// possibly without gokogiri
type Node struct {
Selectable
doc *html.HtmlDocument
ptr unsafe.Pointer
node xml.Node
}
// A Document wrapper, which can be Freed and Selected, and exposes
// the root as a Node object with the Root field
type Document struct {
Selectable
doc *html.HtmlDocument
docptr unsafe.Pointer
Root Node
}
// Fill a Node element from a ptr
func (n *Node) fromPtr(ptr unsafe.Pointer, doc *html.HtmlDocument) {
n.ptr = ptr
n.doc = doc
n.node = xml.NewNode(ptr, doc)
}
// Fill a Node element from an xml.Node
func (n *Node) fromNode(node xml.Node, doc *html.HtmlDocument) {
n.ptr = node.NodePtr()
n.node = node
n.doc = doc
}
func (n *Node) CssSelect(selector string) []Node {
xpathexpr := css.Convert(selector, 0)
expr := xpath.Compile(xpathexpr)
nxp := xpath.NewXPath(n.ptr)
defer nxp.Free()
nodes := nxp.Evaluate(n.ptr, expr)
ret := make([]Node, len(nodes))
for i, ptr := range nodes {
ret[i].fromPtr(ptr, n.doc)
}
return ret
}
func (n *Node) Text() string {
return n.node.Content()
}
func (n *Node) Attr(attr string) string {
return n.node.Attr(attr)
}
func (d *Document) CssSelect(selector string) []Node {
return d.Root.CssSelect(selector)
}
func (d *Document) Text() string {
return ""
}
func (d *Document) FromString(str string) error {
buff := bytes.NewBufferString(str)
bites := buff.Bytes()
return d.FromBytes(bites)
}
func (d *Document) FromBytes(str []byte) error {
doc, err := gokogiri.ParseHtml(str)
if err != nil {
return err
}
d.doc = doc
d.Root.fromNode(doc.Root(), doc)
d.docptr = doc.DocPtr()
return nil
}
func (d *Document) Free() {
d.doc.Free()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment