Created
April 2, 2013 19:54
-
-
Save robyoung/5295628 to your computer and use it in GitHub Desktop.
Go's XML parser seems to have trouble with empty elements.
Tested with: 1.0.3 and 16533:7cfa82648086
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package xmlparse | |
import ( | |
"encoding/xml" | |
"fmt" | |
"io" | |
"strings" | |
"testing" | |
) | |
const XML_DATA_BAD = `<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd" version="0.8" xml:lang="en"> | |
<siteinfo> | |
<namespaces> | |
<namespace key="0" case="first-letter"></namespace> | |
</namespaces> | |
</siteinfo> | |
<page> | |
<title>AccessibleComputing</title> | |
</page> | |
</mediawiki>` | |
const XML_DATA_GOOD = `<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.8/ http://www.mediawiki.org/xml/export-0.8.xsd" version="0.8" xml:lang="en"> | |
<siteinfo> | |
<namespaces> | |
<namespace key="0" case="first-letter">something</namespace> | |
</namespaces> | |
</siteinfo> | |
<page> | |
<title>AccessibleComputing</title> | |
</page> | |
</mediawiki>` | |
// Emit xml.Token instances on a channel, closing it when the end | |
// of the stream is reached. | |
func parseTokensFromReader(reader io.Reader) chan xml.Token { | |
tokens := make(chan xml.Token) | |
go func() { | |
decoder := xml.NewDecoder(reader) | |
for { | |
token, err := decoder.Token() | |
if err != nil { | |
if err == io.EOF { | |
close(tokens) | |
return | |
} else { | |
panic(fmt.Sprintf("Failed to read token: %v", err)) | |
} | |
} else { | |
tokens <- token | |
} | |
} | |
}() | |
return tokens | |
} | |
func testXmlData(t *testing.T, xml_data string) { | |
var ( | |
stack []string | |
title string | |
) | |
for t := range parseTokensFromReader(strings.NewReader(xml_data)) { | |
switch token := t.(type) { | |
case xml.StartElement: | |
stack = append(stack, token.Name.Local) | |
case xml.EndElement: | |
stack = stack[:len(stack)-1] | |
case xml.CharData: | |
if len(stack) > 0 && stack[len(stack)-1] == "title" { | |
title = string(token) | |
} | |
} | |
} | |
if title != "AccessibleComputing" { | |
t.Errorf("The titles do not match: %v", title) | |
} | |
} | |
// Using a string reader | |
func TestBadXML(t *testing.T) { | |
testXmlData(t, XML_DATA_BAD) | |
} | |
func TestGoodXML(t *testing.T) { | |
testXmlData(t, XML_DATA_GOOD) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment