Created
May 26, 2014 11:30
-
-
Save gilliek/09df03b7af9d285ad42b to your computer and use it in GitHub Desktop.
HTML parser example based on the Go XML package. To keep things simple, it only unserializes the <head> node.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// "THE BEER-WARE LICENSE" (Revision 42): | |
// <[email protected]> wrote this file. As long as you retain | |
// this notice you can do whatever you want with this stuff. If we meet some | |
// day, and you think this stuff is worth it, you can buy me a beer in return | |
// Kevin Gillieron | |
package main | |
import ( | |
"encoding/xml" | |
"fmt" | |
"io/ioutil" | |
"os" | |
) | |
type HTMLNode struct { | |
Head HeadNode `xml:"head"` | |
Body BodyNode `xml:"body"` | |
} | |
type HeadNode struct { | |
Title string `xml:"title"` | |
Meta []MetaNode `xml:"meta"` | |
Links []LinkNode `xml:"link"` | |
Scripts []ScriptNode `xml:"script"` | |
} | |
type MetaNode struct { | |
Name string `xml:"name,attr"` | |
Content string `xml:"content,attr"` | |
Charset string `xml:"charset,attr"` | |
HTTPEquiv string `xml:"http-equiv,attr"` | |
Schema string `xml:"schema,attr"` | |
} | |
type LinkNode struct { | |
Charset string `xml:"charset,attr"` | |
Href string `xml:"href,attr"` | |
HrefLang string `xml:"hreflang,attr"` | |
Media string `xml:"media,attr"` | |
Rel string `xml:"rel,attr"` | |
Rev string `xml:"rev,attr"` | |
Sizes string `xml:"sizes,attr"` | |
Target string `xml:"target,attr"` | |
Type string `xml:"type,attr"` | |
} | |
type ScriptNode struct { | |
Async string `xml:"async,attr"` | |
Charset string `xml:"charset,attr"` | |
Defer string `xml:"defer,attr"` | |
Src string `xml:"src,attr"` | |
Type string `xml:"type,attr"` | |
Data string `xml:",chardata"` | |
} | |
type BodyNode struct { | |
RawData []byte `xml:",chardata"` | |
} | |
func main() { | |
htmlFile, _ := os.Open("foo.html") | |
defer htmlFile.Close() | |
b, _ := ioutil.ReadAll(htmlFile) | |
var node HTMLNode | |
xml.Unmarshal(b, &node) | |
fmt.Println(node) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment