Skip to content

Instantly share code, notes, and snippets.

@gilliek
Created May 26, 2014 11:30
Show Gist options
  • Save gilliek/09df03b7af9d285ad42b to your computer and use it in GitHub Desktop.
Save gilliek/09df03b7af9d285ad42b to your computer and use it in GitHub Desktop.
HTML parser example based on the Go XML package. To keep things simple, it only unserializes the <head> node.
// "THE BEER-WARE LICENSE" (Revision 42):
// <[email protected]> wrote this file. As long as you retain
// this notice you can do whatever you want with this stuff. If we meet some
// day, and you think this stuff is worth it, you can buy me a beer in return
// Kevin Gillieron
package main
import (
"encoding/xml"
"fmt"
"io/ioutil"
"os"
)
type HTMLNode struct {
Head HeadNode `xml:"head"`
Body BodyNode `xml:"body"`
}
type HeadNode struct {
Title string `xml:"title"`
Meta []MetaNode `xml:"meta"`
Links []LinkNode `xml:"link"`
Scripts []ScriptNode `xml:"script"`
}
type MetaNode struct {
Name string `xml:"name,attr"`
Content string `xml:"content,attr"`
Charset string `xml:"charset,attr"`
HTTPEquiv string `xml:"http-equiv,attr"`
Schema string `xml:"schema,attr"`
}
type LinkNode struct {
Charset string `xml:"charset,attr"`
Href string `xml:"href,attr"`
HrefLang string `xml:"hreflang,attr"`
Media string `xml:"media,attr"`
Rel string `xml:"rel,attr"`
Rev string `xml:"rev,attr"`
Sizes string `xml:"sizes,attr"`
Target string `xml:"target,attr"`
Type string `xml:"type,attr"`
}
type ScriptNode struct {
Async string `xml:"async,attr"`
Charset string `xml:"charset,attr"`
Defer string `xml:"defer,attr"`
Src string `xml:"src,attr"`
Type string `xml:"type,attr"`
Data string `xml:",chardata"`
}
type BodyNode struct {
RawData []byte `xml:",chardata"`
}
func main() {
htmlFile, _ := os.Open("foo.html")
defer htmlFile.Close()
b, _ := ioutil.ReadAll(htmlFile)
var node HTMLNode
xml.Unmarshal(b, &node)
fmt.Println(node)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment