Skip to content

Instantly share code, notes, and snippets.

@robyoung
Created April 2, 2013 19:54
Show Gist options
  • Save robyoung/5295628 to your computer and use it in GitHub Desktop.
Save robyoung/5295628 to your computer and use it in GitHub Desktop.
Go's XML parser seems to have trouble with empty elements. Tested with: 1.0.3 and 16533:7cfa82648086
package xmlparse
import (
"encoding/xml"
"fmt"
"io"
"strings"
"testing"
)
const XML_DATA_BAD = `<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd" version="0.8" xml:lang="en">
<siteinfo>
<namespaces>
<namespace key="0" case="first-letter"></namespace>
</namespaces>
</siteinfo>
<page>
<title>AccessibleComputing</title>
</page>
</mediawiki>`
const XML_DATA_GOOD = `<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd" version="0.8" xml:lang="en">
<siteinfo>
<namespaces>
<namespace key="0" case="first-letter">something</namespace>
</namespaces>
</siteinfo>
<page>
<title>AccessibleComputing</title>
</page>
</mediawiki>`
// Emit xml.Token instances on a channel, closing it when the end
// of the stream is reached.
func parseTokensFromReader(reader io.Reader) chan xml.Token {
tokens := make(chan xml.Token)
go func() {
decoder := xml.NewDecoder(reader)
for {
token, err := decoder.Token()
if err != nil {
if err == io.EOF {
close(tokens)
return
} else {
panic(fmt.Sprintf("Failed to read token: %v", err))
}
} else {
tokens <- token
}
}
}()
return tokens
}
func testXmlData(t *testing.T, xml_data string) {
var (
stack []string
title string
)
for t := range parseTokensFromReader(strings.NewReader(xml_data)) {
switch token := t.(type) {
case xml.StartElement:
stack = append(stack, token.Name.Local)
case xml.EndElement:
stack = stack[:len(stack)-1]
case xml.CharData:
if len(stack) > 0 && stack[len(stack)-1] == "title" {
title = string(token)
}
}
}
if title != "AccessibleComputing" {
t.Errorf("The titles do not match: %v", title)
}
}
// Using a string reader
func TestBadXML(t *testing.T) {
testXmlData(t, XML_DATA_BAD)
}
func TestGoodXML(t *testing.T) {
testXmlData(t, XML_DATA_GOOD)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment