Skip to content

Instantly share code, notes, and snippets.

@lpenaud
Last active January 22, 2025 17:42
Show Gist options
  • Save lpenaud/a547b2844045d7fadc91b4cde5688fd9 to your computer and use it in GitHub Desktop.
Save lpenaud/a547b2844045d7fadc91b4cde5688fd9 to your computer and use it in GitHub Desktop.
XmlTransformStream
export interface TreeXmlNode {
name: string;
content: string;
children: TreeXmlNode[];
}
export interface XmlMark {
name: string;
content: string;
type: symbol;
}
export const START_XML_NODE = Symbol("START_NODE");
export const END_XML_NODE = Symbol("END_NODE");
export class XmlNodeTransformStream extends TransformStream<string, XmlMark> {
#remaning: string | null;
constructor() {
super({
transform: (chunck, controller) => this.#transform(chunck, controller),
});
this.#remaning = null;
}
#transform(
chunck: string,
controller: TransformStreamDefaultController<XmlMark>,
) {
if (this.#remaning !== null) {
chunck = this.#remaning + chunck;
this.#remaning = null;
}
let leftPos: number = 0;
let rightPos: number;
while ((leftPos = chunck.indexOf("<", leftPos)) !== -1) {
rightPos = chunck.indexOf(">", leftPos);
if (rightPos === -1) {
this.#remaning = chunck.substring(leftPos);
break;
}
// Ignore '<'
// Loop condition: indexOf next result
leftPos++;
const name = chunck.substring(leftPos, rightPos);
// Si c'est la fin d'un noeud
if (name.startsWith("/")) {
// Maybe the last chunk end with a last mark
// So the current one start with it
// Then we have to now where begin the content
const contentStart = chunck.lastIndexOf(">", rightPos - 1);
const mark: XmlMark = {
name: name.substring(1),
content: contentStart === -1
? ""
// Discard the brackets
: chunck.substring(contentStart + 1, leftPos - 1),
type: END_XML_NODE,
};
controller.enqueue(mark);
continue;
}
controller.enqueue({
name,
content: "",
type: START_XML_NODE,
});
// Fin d'un noeud sans contenue
if (name.endsWith("/>")) {
controller.enqueue({
name: name.slice(0, -2),
content: "",
type: END_XML_NODE,
});
continue;
}
}
}
}
export class XmlNodeTreeTransformStream
extends TransformStream<XmlMark, TreeXmlNode> {
#root: TreeXmlNode;
#prevs: TreeXmlNode[];
#current: TreeXmlNode;
constructor() {
super({
transform: (chunck, controller) => this.#tranform(chunck, controller),
});
this.#root = {
children: [],
content: "",
name: "",
};
this.#prevs = [];
this.#current = this.#root;
}
#tranform(
{ content, name, type }: XmlMark,
controller: TransformStreamDefaultController<TreeXmlNode>,
) {
if (type === START_XML_NODE) {
const node: TreeXmlNode = {
children: [],
content: content,
name,
};
this.#current.children.push(node);
this.#prevs.push(this.#current);
this.#current = node;
return;
}
// END_NODE
this.#current.content = content;
controller.enqueue(this.#current);
this.#current = this.#prevs.pop() ?? this.#root;
}
}
if (import.meta.main) {
const stream = Deno.stdin.readable
.pipeThrough(new TextDecoderStream())
.pipeThrough(new XmlNodeTransformStream())
.pipeThrough(new XmlNodeTreeTransformStream());
let last: TreeXmlNode | undefined;
for await (const tree of stream) {
last = tree;
}
console.log(last);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment