Skip to content

Instantly share code, notes, and snippets.

@jmoiron
Last active December 11, 2015 04:59
Show Gist options
  • Save jmoiron/4549531 to your computer and use it in GitHub Desktop.
Save jmoiron/4549531 to your computer and use it in GitHub Desktop.
package main
import (
"bytes"
"fmt"
"github.com/moovweb/gokogiri"
"github.com/moovweb/gokogiri/css"
"github.com/moovweb/gokogiri/html"
"github.com/moovweb/gokogiri/xml"
"github.com/moovweb/gokogiri/xpath"
"regexp"
"strings"
"time"
"unsafe"
)
var numRegex = regexp.MustCompile("(\\d+(?:\\.\\d+)?)")
// Find a number in a string
func FindNumber(str string) string {
groups := numRegex.FindStringSubmatch(str)
if groups == nil {
return ""
}
return groups[1]
}
// given a unix timestamp, return a formatted date as a string
func toDate(timestamp int64) string {
if timestamp == 0 {
return "never"
}
t := time.Unix(timestamp, 0)
return t.Format(time.UnixDate)
}
func FileExtension(str string) string {
spl := strings.Split(str, ".")
ext := spl[len(spl)-1]
ext = strings.Split(ext, "?")[0]
return ext
}
func tick() { fmt.Printf("%s\n", time.Now().String()) }
// Selectable implements a simple interface which allows to get the inner text
// of some element as well as run a CSS select on it and get a list of nodes
type Selectable interface {
CssSelect(selector string) []Node
Text() string
}
// A node wrapper, in order to provide a similar interface in the future
// possibly without gokogiri
type Node struct {
Selectable
doc *html.HtmlDocument
ptr unsafe.Pointer
node xml.Node
}
// A Document wrapper, which can be Freed and Selected, and exposes
// the root as a Node object with the Root field
type Document struct {
Selectable
doc *html.HtmlDocument
docptr unsafe.Pointer
Root Node
}
// Fill a Node element from a ptr
func (n *Node) fromPtr(ptr unsafe.Pointer, doc *html.HtmlDocument) {
n.ptr = ptr
n.doc = doc
n.node = xml.NewNode(ptr, doc)
}
// Fill a Node element from an xml.Node
func (n *Node) fromNode(node xml.Node, doc *html.HtmlDocument) {
n.ptr = node.NodePtr()
n.node = node
n.doc = doc
}
func (n *Node) CssSelect(selector string) []Node {
xpathexpr := css.Convert(selector, 0)
expr := xpath.Compile(xpathexpr)
nxp := xpath.NewXPath(n.ptr)
defer nxp.Free()
nodes := nxp.Evaluate(n.ptr, expr)
ret := make([]Node, len(nodes))
for i, ptr := range nodes {
ret[i].fromPtr(ptr, n.doc)
}
return ret
}
func (n *Node) Text() string {
return n.node.Content()
}
func (n *Node) Attr(attr string) string {
return n.node.Attr(attr)
}
func (d *Document) CssSelect(selector string) []Node {
return d.Root.CssSelect(selector)
}
func (d *Document) Text() string {
return ""
}
func (d *Document) FromString(str string) error {
buff := bytes.NewBufferString(str)
bites := buff.Bytes()
return d.FromBytes(bites)
}
func (d *Document) FromBytes(str []byte) error {
doc, err := gokogiri.ParseHtml(str)
if err != nil {
return err
}
d.doc = doc
d.Root.fromNode(doc.Root(), doc)
d.docptr = doc.DocPtr()
return nil
}
func (d *Document) Free() {
d.doc.Free()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment