Last active
July 21, 2025 03:42
-
-
Save magicoal-nerb/4a1c06c9af789d8cf4415acaa11ebf79 to your computer and use it in GitHub Desktop.
xml parser in luajit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Xml.lua | |
-- Parses xml files | |
-- poopbarrel/magicoal_nerb :^) | |
-- This parser makes a few assumptions about our data: | |
-- * element tags are not separated by whitespace | |
-- * no comments | |
local Stack = require("Stack") | |
local ffi = require("ffi") | |
local XML_CLASSIFIER = { | |
8, 8, 8, 8, 8, 8, 8, 8, 8, 40, 40, 40, 40, 40, 8, | |
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, | |
160, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
66, 16, 16, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 7, 16, 16, | |
16, 16, 66, 16, 65, 65, 65, 65, 65, 65, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, | |
16, 16, 16, 16, 16, 66, 66, 66, 66, 66, 66, 2, 2, 2, | |
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, | |
16, 16, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 160, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, | |
16, 16, 16, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, | |
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2 | |
} | |
local XmlStack = Stack.new(16) | |
local Xml = {} | |
Xml.__index = Xml | |
function Xml.forEachTag(element, tag, fn) | |
for i, child in ipairs(element.children) do | |
if child.tag == tag then | |
fn(child) | |
end | |
end | |
end | |
function Xml.forEach(element, fn) | |
for i, child in ipairs(element.children) do | |
fn(child) | |
end | |
end | |
function Xml.get(element, tag) | |
for i, child in ipairs(element.children)do | |
if child.tag == tag then | |
return child | |
end | |
end | |
end | |
function Xml.loadFromFile(path) | |
local file = io.open(path, 'r') | |
local data = Xml.parse(file:read('*a')) | |
file:close() | |
return data | |
end | |
function Xml.parse(content) | |
XmlStack:clear() | |
local node = { | |
children = {}, | |
props = {}, | |
tag = "root", | |
} | |
local length = #content | |
local ptr = ffi.cast("uint8_t*", content) | |
local read = 0 | |
local key | |
while read < length do | |
assert(ptr[read] == 60) -- expected a '<' | |
local current = ptr[read + 1] | |
if current == 47 then | |
-- slash, go back to the | |
-- parent element | |
node = XmlStack:pop() | |
while ptr[read] ~= 62 do | |
read = read + 1 | |
end | |
while ptr[read] ~= 60 do | |
-- read until we hit '<' | |
read = read + 1 | |
end | |
elseif current == 33 then | |
-- exclamation, used for | |
-- cdata strings | |
local valueStart = read + 1 | |
-- ]]> -> 93 93 62 | |
while ptr[read] ~= 93 | |
or ptr[read + 1] ~= 93 | |
or ptr[read + 2] ~= 62 do | |
-- do the unique case first, then we | |
-- check with the common loop at the bottom | |
read = read + 1 | |
while ptr[read] ~= 93 do | |
read = read + 1 | |
end | |
end | |
read = read + 3 | |
node.body = ffi.string(ptr + valueStart, read - valueStart) | |
else | |
-- <tag [fields..?]> | |
read = read + 1 | |
local startTag = read | |
while bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x7) ~= 0 do | |
read = read + 1 | |
end | |
local newNode = {} | |
local props = {} | |
newNode.class = ffi.string(ptr + startTag, read - startTag) | |
newNode.props = props | |
newNode.children = {} | |
-- read fields | |
while ptr[read] ~= 62 do | |
-- read until we hit '>' | |
if bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x20) ~= 0 then | |
-- whitespace; just ignore | |
read = read + 1 | |
else | |
-- field | |
local fieldStart = read | |
while bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x7) ~= 0 do | |
read = read + 1 | |
end | |
-- value | |
local field = ffi.string(ptr + fieldStart, read - fieldStart) | |
assert(ptr[read] == 61) -- = | |
assert(ptr[read + 1] == 34) -- " | |
read = read + 2 | |
local valueStart = read | |
while ptr[read] ~= 34 do | |
read = read + 1 | |
end | |
props[field] = ffi.string(ptr + valueStart, read - valueStart) | |
read = read + 1 | |
end | |
end | |
-- add node to datastructure | |
table.insert(node.children, newNode) | |
XmlStack:push(node) | |
read = read + 1 | |
node = newNode | |
-- skip whitespace | |
while bit.band(XML_CLASSIFIER[ptr[read] + 1], 0x20) ~= 0 do | |
read = read + 1 | |
end | |
-- create body | |
local bodyStart = read | |
while ptr[read] ~= 60 do | |
-- read until we hit '<' | |
read = read + 1 | |
end | |
-- set body and go back so the parser | |
-- picks up our symbol | |
if ptr[bodyStart] ~= 60 then | |
node.body = ffi.string(ptr + bodyStart, read - bodyStart) | |
end | |
end | |
end | |
assert(not XmlStack:pop(), "Nodes are invalid D:") | |
return node | |
end | |
return Xml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment