Last active
December 13, 2022 13:38
-
-
Save mortie/9a844a25bfb6edc49c833da0936ac120 to your computer and use it in GitHub Desktop.
Create a canonicalized representation of devicetree files, for easier diffing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import traceback | |
import subprocess | |
class Reader: | |
def __init__(self, f): | |
self.f = f | |
self.next = f.read(1) | |
self.next2 = f.read(1) | |
self.line = 1 | |
self.column = 1 | |
def consume(self): | |
self.column += 1 | |
if self.next == b"\n": | |
self.line += 1 | |
self.column = 1 | |
self.next = self.next2 | |
self.next2 = self.f.read(1) | |
class Node: | |
def __init__(self, parent): | |
self.parent = parent | |
self.props = {} | |
self.children = {} | |
def name(self): | |
if not self.parent: | |
return b"/" | |
for k in self.parent.children: | |
if self.parent.children[k] is self: | |
return k | |
def path(self): | |
if self.parent: | |
parent_path = self.parent.path() | |
if parent_path == b"/": | |
return b"/" + self.name() | |
else: | |
return self.parent.path() + b"/" + self.name() | |
else: | |
return b"/" | |
class ParseError(Exception): | |
def __init__(self, r, msg, node=None): | |
self.line = r.line | |
self.column = r.column | |
self.msg = msg | |
self.node = node | |
def skip(r, ch): | |
if r.next != ch: | |
raise ParseError(r, "Expected " + str(ch) + ", got " + str(r.next)) | |
r.consume() | |
def skip_whitespace(r): | |
while True: | |
while r.next in b" \n\t" and r.next != b"": | |
r.consume() | |
if r.next == b"/" and r.next2 == b"/": | |
skip_line(r) | |
elif r.next == b"/" and r.next2 == b"*": | |
r.consume() | |
r.consume() | |
while True: | |
if r.next == b"*" and r.next2 == b"/": | |
break | |
elif r.next == b"": | |
raise ParseError(r, "Unexpected EOF in comment") | |
r.consume() | |
elif r.next == b"#" and r.next2 == b" ": | |
skip_line(r) | |
else: | |
break | |
def skip_line(r): | |
while r.next != b"\n" and r.next != b"": | |
r.consume() | |
r.consume() | |
def read_ident(r): | |
ident = b"" | |
while True: | |
ch = r.next | |
if ch in b"{}:;<>() \n\t" or ch == b"": | |
return ident | |
ident += ch | |
r.consume() | |
def parse_value(r): | |
value = b"" | |
while True: | |
skip_whitespace(r) | |
if r.next == b";" or r.next == b">": | |
return value | |
if r.next == b",": | |
r.consume() | |
value += b"," | |
skip_whitespace(r) | |
if value != b"": | |
value += b" " | |
if r.next == b"": | |
raise ParseError(r, "Unexpected EOF while parsing value") | |
elif r.next == b'"': | |
value += b'"' | |
r.consume() | |
while r.next != b'"': | |
if r.next == b"\\": | |
value += r.next | |
r.consume() | |
value += r.next | |
r.consume() | |
else: | |
value += r.next | |
r.consume() | |
value += b'"' | |
r.consume() | |
elif r.next == b"<": | |
value += b"<" | |
r.consume() | |
value += parse_value(r) | |
value += b">" | |
skip(r, b">") | |
elif r.next == b"(": | |
value += b"(" | |
r.consume() | |
depth = 1 | |
while depth > 0: | |
if r.next == b"(": | |
depth += 1 | |
elif r.next == b")": | |
depth -= 1 | |
value += r.next | |
r.consume() | |
elif r.next in b"0123456789": | |
leading = r.next | |
r.consume() | |
if leading == b"0" and r.next == b"x": | |
r.consume() | |
digits = b"" | |
while r.next in b"0123456789abcdefABCDEF": | |
digits += r.next | |
r.consume() | |
lead = "0x" | |
value += bytes(hex(int(digits, 16)), "utf-8") | |
else: | |
digits = leading | |
while r.next in b"0123456789": | |
digits += r.next | |
r.consume() | |
value += bytes(str(int(digits)), "utf-8") | |
elif r.next == b"[": | |
value += b"[" | |
r.consume() | |
first = True | |
while True: | |
if not first: | |
value += b" " | |
first = False | |
skip_whitespace(r) | |
if r.next == b"]": | |
break | |
digits = b"" | |
if r.next not in b"0123456789abcdefABCDEF": | |
raise ParseError("Invalid hex character") | |
digits += r.next | |
r.consume() | |
if r.next not in b"0123456789abcdefABCDEF": | |
raise ParseError("Invalid hex character") | |
digits += r.next | |
num = int(digits) | |
value += bytes(str(digits, "utf-8").lower(), "utf-8") | |
value += b"]" | |
r.consume() | |
elif r.next == b"&": | |
value += b"&" | |
r.consume() | |
value += read_ident(r) | |
elif r.next == b"{": | |
value += b"{" | |
skip_whitespace(r) | |
value += read_ident(r) | |
skip_whitespace(r) | |
value += b"}" | |
skip(r, b"}") | |
else: | |
lead = r.next | |
ident = read_ident(r) | |
if ident == b"": | |
raise ParseError(r, "Unknown lead character in value: " + str(r.next)) | |
value += ident | |
if r.next == b":": | |
value += b":" | |
r.consume() | |
return value | |
def parse_node(r, node, labels): | |
skip(r, b"{") | |
while True: | |
skip_whitespace(r) | |
if r.next == b"}": | |
r.consume(); | |
skip(r, b";") | |
return | |
name = read_ident(r) | |
skip_whitespace(r) | |
if r.next == b"=": | |
r.consume() | |
skip_whitespace(r) | |
try: | |
node.props[name] = parse_value(r) | |
except ParseError as ex: | |
ex.node = node | |
raise ex | |
skip_whitespace(r) | |
skip(r, b";") | |
skip_whitespace(r) | |
elif r.next == b";": | |
r.consume() | |
skip_whitespace(r) | |
node.props[name] = True | |
elif r.next == b":" or r.next == b"{": | |
if r.next == b":": | |
label = name | |
r.consume() | |
skip_whitespace(r) | |
name = read_ident(r) | |
skip_whitespace(r) | |
else: | |
label = None | |
if name in node.children: | |
child = node.children[name] | |
else: | |
child = Node(node) | |
node.children[name] = child | |
if label: | |
labels[label] = child | |
parse_node(r, child, labels) | |
else: | |
raise ParseError(r, "Expected value or child node, got " + str(r.next), node) | |
def parse_document(r): | |
labels = {} | |
roots = {} | |
while True: | |
skip_whitespace(r) | |
if r.next == b"": | |
return roots, labels | |
elif r.next == b"&": | |
r.consume() | |
name = read_ident(r) | |
assert(name != b"") | |
skip_whitespace(r) | |
assert(name in labels) | |
parse_node(r, labels[name], labels) | |
else: | |
name = read_ident(r) | |
if name == b"/dts-v1/": | |
skip_whitespace(r) | |
skip(r, b";") | |
continue | |
if name in roots: | |
node = roots[name] | |
else: | |
node = Node(None) | |
roots[name] = node | |
skip_whitespace(r) | |
parse_node(r, node, labels) | |
def print_node(node, name, outfile): | |
if name == b"": | |
print("/ {", file=outfile) | |
else: | |
print(str(name, "utf-8") + " {", file=outfile) | |
for k in sorted(node.props.keys()): | |
prop = node.props[k] | |
if prop == True: | |
print(" " + str(k, "utf-8") + ";", file=outfile) | |
else: | |
print(" " + str(k, "utf-8") + " = " + str(prop, "utf-8") + ";", file=outfile) | |
print("};", file=outfile) | |
for k in sorted(node.children.keys()): | |
print_node(node.children[k], name + b"/" + k, outfile) | |
def print_roots(roots, outfile): | |
for k in sorted(roots.keys()): | |
if k == b"/": | |
print_node(roots[k], b"", outfile) | |
else: | |
print_node(roots[k], k, outfile) | |
def print_labels(labels, outfile): | |
for k in sorted(labels.keys()): | |
print(str(k, "utf-8") + " = " + str(labels[k].path(), "utf-8"), file=outfile) | |
if len(sys.argv) <= 1: | |
print("Usage:", sys.argv[0], "[cpp options] file...") | |
exit(1) | |
args = ["cpp", "-nostdinc", "-undef", "-x", "assembler-with-cpp"] | |
outfile = sys.stdout | |
output_labels = False | |
idx = 1 | |
while idx < len(sys.argv): | |
arg = sys.argv[idx] | |
if arg == "-o": | |
idx += 1 | |
outfile = open(sys.argv[idx], "w") | |
elif arg.startswith("-o"): | |
outfile = open(arg[2:], "w") | |
elif arg == "--output-labels": | |
output_labels = True | |
else: | |
args += [arg] | |
idx += 1 | |
proc = subprocess.Popen(args, stdout=subprocess.PIPE) | |
r = Reader(proc.stdout) | |
try: | |
roots, labels = parse_document(r) | |
print_roots(roots, outfile) | |
if output_labels: | |
print("", file=outfile) | |
print("Labels:", file=outfile) | |
print_labels(labels, outfile) | |
except ParseError as ex: | |
print(f"Line {ex.line}, col {ex.column}: {ex.msg}", file=sys.stderr) | |
if ex.node: | |
print("While parsing node: " + str(ex.node.path(), "utf-8"), file=sys.stderr) | |
print("", file=sys.stderr) | |
traceback.print_exception(ex) | |
exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment