Skip to content

Instantly share code, notes, and snippets.

@thatrandomperson5
Created January 11, 2023 01:13
Show Gist options
  • Save thatrandomperson5/482fb4562b72e46900cf736f72759643 to your computer and use it in GitHub Desktop.
Save thatrandomperson5/482fb4562b72e46900cf736f72759643 to your computer and use it in GitHub Desktop.
Pure-Nim hand-written xml parser, using only stdlib
import parser
const xml = """
<MyTag>
Hello
<OtherTag attr1="hello" attr2="nono"></OtherTag>
<Container>
I'm shallow
<Deep>I'm down deep!</Deep>
I'm shallow
</Container>
</MyTag>
"""
let p = xml.makeParser
let res = p.parse
echo res
echo res.repr
import std/[sequtils, algorithm, strutils, strformat, strtabs]
type Parser = object
stack: seq[char]
type
XmlTag = ref object
case isText: bool
of false:
name*: string
children*: seq[XmlTag]
attrs*: StringTableRef
of true:
text*: string
ParseResult = object
case didWork: bool
of false:
processed: seq[char]
of true:
s: string
UnPairedKind {.pure.} = enum Start, End, Text
UnPaired = object
kind: UnPairedKind
s: string
depth: int
proc makeParser*(xml: string): Parser =
result = Parser(stack: xml.strip().toSeq.reversed())
proc parseHead(head: string): (string, StringTableRef) =
var tag = ""
var stack = head.toSeq.reversed()
var last = stack.pop
while last != ' ':
tag &= last
last = stack.pop
if stack.len == 0:
tag &= last
return (tag, newStringTable())
var tbl = newStringTable()
var formedName = ""
var formedAttr = ""
while stack.len > 0:
last = stack.pop
if last != '=':
formedName &= last
else:
doAssert stack.pop == '"'
last = stack.pop
while last != '"':
formedAttr &= last
last = stack.pop
tbl[formedName] = formedAttr
formedName = ""
formedAttr = ""
if stack.len > 0:
doAssert stack.pop == ' '
result = (tag, tbl)
proc parseStartTag(s: var seq[char]): ParseResult =
var last = s.pop
var str = ""
var processed = newSeq[char]()
var didWork = true
if last != '<':
didWork = false
processed.add last
else:
while last != '>' and s.len != 0:
processed.add last
str &= last
last = s.pop
if s.len == 0:
didWork = false
break
result = ParseResult(didWork: didWork)
if result.didWork:
str = str[1..^1]
result.s = str
else:
result.processed = processed
proc parseEndTag(s: var seq[char]): ParseResult =
var last = s.pop
var str = ""
var processed = newSeq[char]()
var didWork = true
if s.len == 0:
return ParseResult(didWork: false, processed: @[last])
if last & s[^1] != "</":
didWork = false
processed.add last
else:
processed.add s.pop
while last != '>' and s.len != 0:
processed.add last
str &= last
last = s.pop
if s.len == 0 and last != '>':
didWork = false
break
result = ParseResult(didWork: didWork)
if result.didWork:
str = str[1..^1]
result.s = str
else:
result.processed = processed
proc parseText(s: var seq[char]): string =
if s[^1] == '<':
return ""
var last = s.pop
while last != '<':
result &= last
last = s.pop
s.add last
proc parseInternal(stack: var seq[char], depth: var int): seq[UnPaired] =
let et = stack.parseEndTag()
if et.didWork:
result.add UnPaired(s: et.s, kind: End, depth: depth)
depth -= 1
else:
stack.add et.processed
let st = stack.parseStartTag()
if st.didWork:
depth += 1
result.add UnPaired(s: st.s, kind: Start, depth: depth)
else:
stack.add st.processed
result.add UnPaired(s: stack.parseText(), kind: Text, depth: depth)
if stack.len != 0:
result.add stack.parseInternal(depth)
proc resolve(pup: seq[UnPaired]): XmlTag =
let toMatch = pup[0]
let up = pup[1..^2]
let head = toMatch.s.parseHead()
result = XmlTag(isText: false, name: head[0], attrs: head[1])
var startPos: int
let properDepth = toMatch.depth + 1
for i, item in up:
case item.kind:
of Start:
if item.depth == properDepth:
startPos = i
of Text:
if item.depth == toMatch.depth:
result.children.add XmlTag(isText: true, text: item.s)
of End:
if item.depth == properDepth:
result.children.add up[startPos..i].resolve()
proc parse*(p: Parser): XmlTag =
var s = p.stack
var depth = 0
let res = s.parseInternal(depth)
doAssert s.len == 0
doAssert depth == 0
return res.resolve()
proc `$`*(xml: XmlTag): string =
if xml.isText:
result = xml.text
else:
var attrs = ""
for key, value in xml.attrs:
attrs &= &" {key}=\"{value}\""
result = fmt"<{xml.name}{attrs}>"
for tag in xml.children:
result &= $tag
result &= fmt"</{xml.name}>"
proc reprCore(xml: XmlTag, depth: var int, i: int): string =
var indent = ""
depth += 1
for _ in 0..(depth*i):
indent &= " "
indent = indent[1..^1]
if xml.isText:
let txt = xml.text.replace("\n", "\\n").replace("\r", "\\r")
result = &"{indent}Text: \"{txt}\"\n"
else:
result = &"{indent}Tag: {xml.name} {$(xml.attrs)}\n"
for tag in xml.children:
result &= tag.reprCore(depth, i)
depth -= 1
proc repr*(xml: XmlTag, indent = 2): string =
var d = -1
return xml.reprCore(d, indent)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment