|
package main |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"io" |
|
|
|
"golang.org/x/net/html" |
|
) |
|
|
|
func main() { |
|
b := bytes.NewBufferString(`<html> |
|
<head></head> |
|
<body> |
|
<div class="product-card front"> |
|
<div class="product-description"> |
|
<a href=""> |
|
<p class="product-name">Test text</p> |
|
</a> |
|
<div class="color_picker"> |
|
<div class="over_wrap" data-cc="0"> |
|
<a href="#"></a> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
</body> |
|
</html> |
|
`) |
|
parse(b) |
|
} |
|
|
|
func parse(r io.Reader) { |
|
doc, err := html.Parse(r) |
|
if err != nil { |
|
fmt.Printf("error: %s\n", err) |
|
return |
|
} |
|
|
|
findProductCardFronts(doc) |
|
} |
|
|
|
func findProductCardFronts(node *html.Node) { |
|
var isProductCardFront bool |
|
if isDivElementNode(node) && getClass(node) == "product-card front" { |
|
isProductCardFront = true |
|
} |
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling { |
|
if !isProductCardFront { |
|
findProductCardFronts(child) |
|
} else { |
|
searchForProductDescription(child) |
|
} |
|
} |
|
} |
|
|
|
func searchForProductDescription(node *html.Node) { |
|
if isDivElementNode(node) && getClass(node) == "product-description" { |
|
for child := node.FirstChild; child != nil; child = child.NextSibling { |
|
colorsAvailable := findColorsAvailable(child) |
|
fmt.Println(colorsAvailable) |
|
} |
|
} |
|
} |
|
|
|
func findColorsAvailable(node *html.Node) (colorsAvailable []string) { |
|
if isDivElementNode(node) && getClass(node) == "color_picker" { |
|
for child := node.FirstChild; child != nil; child = child.NextSibling { |
|
findOverWrapDiv(child) |
|
} |
|
} |
|
return |
|
} |
|
|
|
func findOverWrapDiv(node *html.Node) { |
|
if isDivElementNode(node) && getClass(node) == "over_wrap" { |
|
for child := node.FirstChild; child != nil; child = node.NextSibling { |
|
// This section here |
|
// I expect `node` to be equal to `child.Parent` but its not. |
|
fmt.Println("actual parent") |
|
fmt.Println(node) |
|
fmt.Println("child's parent") |
|
fmt.Println(child.Parent) |
|
// ^ Above section |
|
} |
|
} |
|
} |
|
|
|
func isElementNode(node *html.Node, data string) bool { |
|
return node.Type == html.ElementNode && node.Data == data |
|
} |
|
|
|
func isAnchorElementNode(node *html.Node) bool { |
|
return node.Type == html.ElementNode && node.Data == "a" |
|
} |
|
|
|
func hasHref(node *html.Node) (href string, foundHref bool) { |
|
for _, attr := range node.Attr { |
|
if attr.Key == "href" && len(attr.Val) != 0 { |
|
href = attr.Val |
|
foundHref = true |
|
return |
|
} |
|
} |
|
return |
|
} |
|
|
|
func getClass(node *html.Node) (class string) { |
|
for _, attr := range node.Attr { |
|
if attr.Key == "class" { |
|
return attr.Val |
|
} |
|
} |
|
return |
|
} |
|
|
|
func isDivElementNode(node *html.Node) bool { |
|
return node.Type == html.ElementNode && node.Data == "div" |
|
} |
|
|
|
func isParagraphElementNode(node *html.Node) bool { |
|
return node.Type == html.ElementNode && node.Data == "p" |
|
} |
|
|
|
func isTextNode(node *html.Node) bool { |
|
return node.Type == html.TextNode |
|
} |
Silly silly me... https://gist.github.com/shyamsalimkumar/9f62dbe94b3e6f8186cf69aa7a4faf60#file-test-go-L78 is supposed to be
for child := node.FirstChild; child != nil; child = child.NextSibling {
. Thanks to the nice folks at #go-nuts