Created
August 28, 2024 17:35
-
-
Save maksverver/7ec9221f163070cbd98f4f38a3932036 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import io | |
import libxml2 | |
import xml.sax.xmlreader | |
from drv_libxml2 import LibXml2Reader | |
def parse_binary(): | |
source = xml.sax.xmlreader.InputSource() | |
source.setByteStream(io.BytesIO(b'<?xml version="1.0" encoding="UTF-8"?>\n<root />\n')) | |
return LibXml2Reader().parse(source) | |
def parse_text(): | |
source = xml.sax.xmlreader.InputSource() | |
source.setCharacterStream(io.StringIO('<?xml version="1.0"?>\n<root />\n')) | |
return LibXml2Reader().parse(source) | |
# Succeeds (prints "None") | |
print(parse_binary()) | |
# Fails, and prints: | |
# | |
# xmlPythonFileRead: result is not a String | |
# <unknown>:1:-1: Unknown IO error | |
# | |
# None | |
# | |
print(parse_text()) | |
# Root cause is that LibXml2Reader.parse() does [1]: | |
# | |
# input = libxml2.inputBuffer(source.getByteStream()) | |
# | |
# but since source.getByteStraem() returns None, this creates an ioWrapper that | |
# only ever returns -1 [2], triggering the "result is not a String" errror [3]. | |
# | |
# 1. https://github.com/GNOME/libxml2/blob/master/python/drv_libxml2.py#L149 | |
# 2. https://github.com/GNOME/libxml2/blob/67ff748c3eba93745a09157e11759d09f864492f/python/libxml.py#L73-L74 | |
# 3. https://github.com/GNOME/libxml2/blob/67ff748c3eba93745a09157e11759d09f864492f/python/libxml.c#L333 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment