Last active
December 11, 2015 04:59
-
-
Save jmoiron/4549531 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"fmt" | |
"github.com/moovweb/gokogiri" | |
"github.com/moovweb/gokogiri/css" | |
"github.com/moovweb/gokogiri/html" | |
"github.com/moovweb/gokogiri/xml" | |
"github.com/moovweb/gokogiri/xpath" | |
"regexp" | |
"strings" | |
"time" | |
"unsafe" | |
) | |
var numRegex = regexp.MustCompile("(\\d+(?:\\.\\d+)?)") | |
// Find a number in a string | |
func FindNumber(str string) string { | |
groups := numRegex.FindStringSubmatch(str) | |
if groups == nil { | |
return "" | |
} | |
return groups[1] | |
} | |
// given a unix timestamp, return a formatted date as a string | |
func toDate(timestamp int64) string { | |
if timestamp == 0 { | |
return "never" | |
} | |
t := time.Unix(timestamp, 0) | |
return t.Format(time.UnixDate) | |
} | |
func FileExtension(str string) string { | |
spl := strings.Split(str, ".") | |
ext := spl[len(spl)-1] | |
ext = strings.Split(ext, "?")[0] | |
return ext | |
} | |
func tick() { fmt.Printf("%s\n", time.Now().String()) } | |
// Selectable implements a simple interface which allows to get the inner text | |
// of some element as well as run a CSS select on it and get a list of nodes | |
type Selectable interface { | |
CssSelect(selector string) []Node | |
Text() string | |
} | |
// A node wrapper, in order to provide a similar interface in the future | |
// possibly without gokogiri | |
type Node struct { | |
Selectable | |
doc *html.HtmlDocument | |
ptr unsafe.Pointer | |
node xml.Node | |
} | |
// A Document wrapper, which can be Freed and Selected, and exposes | |
// the root as a Node object with the Root field | |
type Document struct { | |
Selectable | |
doc *html.HtmlDocument | |
docptr unsafe.Pointer | |
Root Node | |
} | |
// Fill a Node element from a ptr | |
func (n *Node) fromPtr(ptr unsafe.Pointer, doc *html.HtmlDocument) { | |
n.ptr = ptr | |
n.doc = doc | |
n.node = xml.NewNode(ptr, doc) | |
} | |
// Fill a Node element from an xml.Node | |
func (n *Node) fromNode(node xml.Node, doc *html.HtmlDocument) { | |
n.ptr = node.NodePtr() | |
n.node = node | |
n.doc = doc | |
} | |
func (n *Node) CssSelect(selector string) []Node { | |
xpathexpr := css.Convert(selector, 0) | |
expr := xpath.Compile(xpathexpr) | |
nxp := xpath.NewXPath(n.ptr) | |
defer nxp.Free() | |
nodes := nxp.Evaluate(n.ptr, expr) | |
ret := make([]Node, len(nodes)) | |
for i, ptr := range nodes { | |
ret[i].fromPtr(ptr, n.doc) | |
} | |
return ret | |
} | |
func (n *Node) Text() string { | |
return n.node.Content() | |
} | |
func (n *Node) Attr(attr string) string { | |
return n.node.Attr(attr) | |
} | |
func (d *Document) CssSelect(selector string) []Node { | |
return d.Root.CssSelect(selector) | |
} | |
func (d *Document) Text() string { | |
return "" | |
} | |
func (d *Document) FromString(str string) error { | |
buff := bytes.NewBufferString(str) | |
bites := buff.Bytes() | |
return d.FromBytes(bites) | |
} | |
func (d *Document) FromBytes(str []byte) error { | |
doc, err := gokogiri.ParseHtml(str) | |
if err != nil { | |
return err | |
} | |
d.doc = doc | |
d.Root.fromNode(doc.Root(), doc) | |
d.docptr = doc.DocPtr() | |
return nil | |
} | |
func (d *Document) Free() { | |
d.doc.Free() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment