Skip to content

Instantly share code, notes, and snippets.

@magicoal-nerb
Last active July 21, 2025 03:42
Show Gist options
  • Save magicoal-nerb/4a1c06c9af789d8cf4415acaa11ebf79 to your computer and use it in GitHub Desktop.
Save magicoal-nerb/4a1c06c9af789d8cf4415acaa11ebf79 to your computer and use it in GitHub Desktop.
xml parser in luajit
-- Xml.lua
-- Parses xml files
-- poopbarrel/magicoal_nerb :^)
-- This parser makes a few assumptions about our data:
-- * element tags are not separated by whitespace
-- * no comments
local Stack = require("Stack")
local ffi = require("ffi")
local XML_CLASSIFIER = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 40, 40, 40, 40, 40, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
160, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
66, 16, 16, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 16, 16,
16, 16, 66, 16, 65, 65, 65, 65, 65, 65, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16,
16, 16, 16, 16, 16, 66, 66, 66, 66, 66, 66, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16,
16, 16, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 160, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1,
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
}
local XmlStack = Stack.new(16)
local Xml = {}
Xml.__index = Xml
function Xml.forEachTag(element, tag, fn)
for i, child in ipairs(element.children) do
if child.tag == tag then
fn(child)
end
end
end
function Xml.forEach(element, fn)
for i, child in ipairs(element.children) do
fn(child)
end
end
function Xml.get(element, tag)
for i, child in ipairs(element.children)do
if child.tag == tag then
return child
end
end
end
function Xml.loadFromFile(path)
local file = io.open(path, 'r')
local data = Xml.parse(file:read('*a'))
file:close()
return data
end
function Xml.parse(content)
XmlStack:clear()
local node = {
children = {},
props = {},
tag = "root",
}
local length = #content
local ptr = ffi.cast("uint8_t*", content)
local read = 0
local key
while read < length do
assert(ptr[read] == 60) -- expected a '<'
local current = ptr[read + 1]
if current == 47 then
-- slash, go back to the
-- parent element
node = XmlStack:pop()
while ptr[read] ~= 62 do
read = read + 1
end
while ptr[read] ~= 60 do
-- read until we hit '<'
read = read + 1
end
elseif current == 33 then
-- exclamation, used for
-- cdata strings
local valueStart = read + 1
-- ]]> -> 93 93 62
while ptr[read] ~= 93
or ptr[read + 1] ~= 93
or ptr[read + 2] ~= 62 do
-- do the unique case first, then we
-- check with the common loop at the bottom
read = read + 1
while ptr[read] ~= 93 do
read = read + 1
end
end
read = read + 3
node.body = ffi.string(ptr + valueStart, read - valueStart)
else
-- <tag [fields..?]>
read = read + 1
local startTag = read
while bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x7) ~= 0 do
read = read + 1
end
local newNode = {}
local props = {}
newNode.class = ffi.string(ptr + startTag, read - startTag)
newNode.props = props
newNode.children = {}
-- read fields
while ptr[read] ~= 62 do
-- read until we hit '>'
if bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x20) ~= 0 then
-- whitespace; just ignore
read = read + 1
else
-- field
local fieldStart = read
while bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x7) ~= 0 do
read = read + 1
end
-- value
local field = ffi.string(ptr + fieldStart, read - fieldStart)
assert(ptr[read] == 61) -- =
assert(ptr[read + 1] == 34) -- "
read = read + 2
local valueStart = read
while ptr[read] ~= 34 do
read = read + 1
end
props[field] = ffi.string(ptr + valueStart, read - valueStart)
read = read + 1
end
end
-- add node to datastructure
table.insert(node.children, newNode)
XmlStack:push(node)
read = read + 1
node = newNode
-- skip whitespace
while bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x20) ~= 0 do
read = read + 1
end
-- create body
local bodyStart = read
while ptr[read] ~= 60 do
-- read until we hit '<'
read = read + 1
end
-- set body and go back so the parser
-- picks up our symbol
if ptr[bodyStart] ~= 60 then
node.body = ffi.string(ptr + bodyStart, read - bodyStart)
end
end
end
assert(not XmlStack:pop(), "Nodes are invalid D:")
return node
end
return Xml
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment