Created
August 31, 2022 05:44
-
-
Save Neradoc/12af11d6828cd177b1167025aeeaa89b to your computer and use it in GitHub Desktop.
Circuitpython XML example, port of micropython-lib.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SPDX-FileCopyrightText: Copyright (c) 2022 Neradoc | |
# SPDX-License-Identifier: Unlicense | |
import sys | |
from ElementTree import parse | |
with open("some-demo.xml", "r") as fp: | |
tree = parse(fp) | |
print(tree) | |
def print_sub_tree(node, depth=0): | |
if node.text is not None: | |
text = '"' + node.text + '"' | |
else: | |
text = "" | |
print(" "*depth, "-", node.tag, text) | |
for key, value in node.attrib.items(): | |
print(" "*depth, "|", key, ":", value) | |
for subnode in node: | |
print_sub_tree(subnode, depth+2) | |
print_sub_tree(tree.getroot()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is part of the standard library of Pycopy project, minimalist | |
# and lightweight Python implementation. | |
# | |
# https://github.com/pfalcon/pycopy | |
# https://github.com/pfalcon/pycopy-lib | |
# | |
# The MIT License (MIT) | |
# | |
# Copyright (c) 2018-2020 Paul Sokolovsky | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in | |
# all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
# THE SOFTWARE. | |
import io | |
import xmltok2 | |
class ParseError(Exception): | |
pass | |
class Element: | |
def __init__(self): | |
self.tag = None | |
self.attrib = {} | |
self.text = None | |
self.tail = None | |
self._children = [] | |
def __getitem__(self, i): | |
return self._children[i] | |
def __len__(self): | |
return len(self._children) | |
def append(self, el): | |
self._children.append(el) | |
def get(self, key, default=None): | |
return self.attrib.get(key, default) | |
def set(self, key, value): | |
self.attrib[key] = value | |
def write(self, file): | |
assert self.tag is not None | |
file.write("<%s" % self.tag) | |
for k, v in self.attrib.items(): | |
file.write(' {}="{}"'.format(k, v)) | |
file.write(">") | |
if self.text is not None: | |
file.write(self.text) | |
for t in self._children: | |
t.write(file) | |
file.write("</%s>" % self.tag) | |
if self.tail is not None: | |
file.write(self.tail) | |
class ElementTree: | |
def __init__(self, root): | |
self.root = root | |
def getroot(self): | |
return self.root | |
def write(self, file): | |
self.root.write(file) | |
file.write("\n") | |
def parse_el(stream): | |
stack = [] | |
root = None | |
last = None | |
for ev in xmltok2.tokenize(stream): | |
typ = ev[0] | |
if typ == xmltok2.START_TAG: | |
el = Element() | |
el.tag = ev[2] | |
if not stack: | |
root = el | |
else: | |
stack[-1]._children.append(el) | |
stack.append(el) | |
last = None | |
elif typ == xmltok2.ATTR: | |
# Ignore attrs of processing instructions | |
if stack: | |
stack[-1].attrib[ev[2]] = ev[3] | |
elif typ == xmltok2.TEXT: | |
if last is None: | |
stack[-1].text = ev[1] | |
else: | |
last.tail = ev[1] | |
elif typ == xmltok2.END_TAG: | |
if stack[-1].tag != ev[2]: | |
raise ParseError("mismatched tag: /%s (expected: /%s)" % (ev[1][1], stack[-1].tag)) | |
last = stack.pop() | |
return root | |
def parse(source): | |
return ElementTree(parse_el(source)) | |
def fromstring(data): | |
buf = io.StringIO(data) | |
return parse_el(buf) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<note> | |
<to>Tove</to> | |
<from>Jani</from> | |
<heading color="red">Reminder</heading> | |
<body class="important">Don't forget me this weekend!</body> | |
</note> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is part of the standard library of Pycopy project, minimalist | |
# and lightweight Python implementation. | |
# | |
# https://github.com/pfalcon/pycopy | |
# https://github.com/pfalcon/pycopy-lib | |
# | |
# The MIT License (MIT) | |
# | |
# Copyright (c) 2018-2019 Paul Sokolovsky | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in | |
# all copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
# THE SOFTWARE. | |
TEXT = "TEXT" | |
START_TAG = "START_TAG" | |
#START_TAG_DONE = "START_TAG_DONE" | |
END_TAG = "END_TAG" | |
PI = "PI" | |
#PI_DONE = "PI_DONE" | |
ATTR = "ATTR" | |
#ATTR_VAL = "ATTR_VAL" | |
class XMLSyntaxError(Exception): | |
pass | |
class XMLTokenizer: | |
def __init__(self, f): | |
self.f = f | |
self.c = "" | |
self.nextch() | |
def getch(self): | |
c = self.c | |
self.nextch() | |
return c | |
def eof(self): | |
return self.c == "" | |
def nextch(self): | |
self.c = self.f.read(1) | |
def skip_ws(self): | |
while self.c.isspace(): | |
self.nextch() | |
def isident(self): | |
self.skip_ws() | |
return self.c.isalpha() | |
def getident(self): | |
self.skip_ws() | |
ident = "" | |
while self.c: | |
c = self.c | |
if not(c.isalpha() or c.isdigit() or c in "_-."): | |
break | |
ident += self.getch() | |
return ident | |
def putnsident(self, res): | |
ns = "" | |
ident = self.getident() | |
if self.c == ":": | |
self.nextch() | |
ns = ident | |
ident = self.getident() | |
res[1] = ns | |
res[2] = ident | |
def match(self, c): | |
self.skip_ws() | |
if self.c == c: | |
self.nextch() | |
return True | |
return False | |
def expect(self, c): | |
if not self.match(c): | |
raise XMLSyntaxError | |
def lex_attrs_till(self, res): | |
while self.isident(): | |
res[0] = ATTR | |
self.putnsident(res) | |
self.expect("=") | |
quote = self.getch() | |
if quote != '"' and quote != "'": | |
raise XMLSyntaxError | |
val = "" | |
while self.c != quote: | |
val += self.getch() | |
self.expect(quote) | |
res[3] = val | |
yield res | |
res[3] = None | |
def tokenize(self): | |
res = [None, None, None, None] | |
while not self.eof(): | |
if self.match("<"): | |
if self.match("/"): | |
res[0] = END_TAG | |
self.putnsident(res) | |
yield res | |
self.expect(">") | |
elif self.match("?"): | |
res[0] = PI | |
res[1] = self.getident() | |
yield res | |
yield from self.lex_attrs_till(res) | |
self.expect("?") | |
self.expect(">") | |
elif self.match("!"): | |
self.expect("-") | |
self.expect("-") | |
last3 = '' | |
while True: | |
last3 = last3[-2:] + self.getch() | |
if last3 == "-->": | |
break | |
else: | |
res[0] = START_TAG | |
self.putnsident(res) | |
ns = res[1] | |
tag = res[2] | |
yield res | |
yield from self.lex_attrs_till(res) | |
if self.match("/"): | |
res[0] = END_TAG | |
res[1] = ns | |
res[2] = tag | |
yield res | |
self.expect(">") | |
else: | |
text = "" | |
while self.c and self.c != "<": | |
text += self.getch() | |
if text: | |
res[0] = TEXT | |
res[1] = text | |
res[2] = None | |
yield res | |
def gfind(gen, pred): | |
for i in gen: | |
if pred(i): | |
return i | |
def text_of(gen, tag): | |
# Return text content of a leaf tag from tokenizer stream | |
def match_tag(t): | |
if t[0] != START_TAG: | |
return False | |
if isinstance(tag, tuple): | |
return t[1] == tag[0] and t[2] == tag[1] | |
return t[2] == tag | |
gfind(gen, match_tag) | |
# Assumes no attributes | |
res = next(gen) | |
assert res[0] == TEXT | |
return res[1] | |
def tokenize(file): | |
return XMLTokenizer(file).tokenize() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment