Created
October 15, 2019 15:49
-
-
Save X39/7bc5275d7b957b9fa8deeac038989414 to your computer and use it in GitHub Desktop.
XML Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//////////////////////////////////////////////////////////////////////////// | |
// Copyright (c) 2019 Marco "X39" Silipo // | |
// // | |
// Permission is hereby granted, free of charge, to any person obtaining // | |
// a copy of this software and associated documentation files (the // | |
// "Software"), to deal in the Software without restriction, including // | |
// without limitation the rights to use, copy, modify, merge, publish, // | |
// distribute, sublicense, and/or sell copies of the Software, and to // | |
// permit persons to whom the Software is furnished to do so, subject to // | |
// the following conditions: // | |
// // | |
// The above copyright notice and this permission notice shall be // | |
// included in all copies or substantial portions of the Software. // | |
// // | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // | |
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // | |
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // | |
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE // | |
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // | |
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // | |
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // | |
//////////////////////////////////////////////////////////////////////////// | |
xml_fnc_parse = { | |
// ToDo: Take care of namespaces | |
params [ | |
["_text", nil, [""]] | |
]; | |
private _text_array = toArray _text; | |
private _whitespaces = [" ", toString [9], toString [13], toString [10]]; | |
private _text_offset = 0; | |
private _line = -1; | |
private _col = -1; | |
private _prolog = []; | |
private _namespaces = []; | |
private _output = []; | |
private _curChar = { _text select [_text_offset, 1] }; | |
private _skip_chars = { | |
private _flag = false; | |
while { ([] call _curChar) in _whitespaces } | |
do { _text_offset = _text_offset + 1; _flag = true; }; | |
_flag | |
}; | |
private _skip_comments = { | |
private _in_comment = false; | |
while { (_text select [_text_offset, 4]) == "<!--" || (_in_comment && (_text select [_text_offset, 3]) == "-->" ) } | |
do { _text_offset = _text_offset + 1; _in_comment = true; }; | |
_in_comment | |
}; | |
private _skip = { while { [] call _skip_chars || [] call _skip_comments } do {} }; | |
/* | |
prolog_start => "<?"; | |
prolog_end => "?>"; | |
xml => xml; | |
node_start => "<"; | |
node_start_fin => "</" | |
node_end => ">"; | |
node_end_fin => "/>"; | |
any => .+; | |
eq => "="; | |
quote => "; | |
bs => \; | |
DOCUMENT = [ PROLOG ] NODE; | |
PROLOG = prolog_start xml ATTRIBUTE prolog_end; | |
NAME = any [ colon any ] | |
NODE = node_start NAME { ATTRIBUTE } ( node_end_fin | node_end ( { NODE } | { TEXT } ) node_start_fin NAME node_end); | |
ATTRIBUTE = NAME eq quote ( NAME | bs quote ) quote; | |
*/ | |
private _t_prolog_start = { _text select [_text_offset, 2] == "<?" }; | |
private _t_prolog_end = { _text select [_text_offset, 2] == "?>" }; | |
private _t_xml = { _text select [_text_offset, 3] == "xml" }; | |
private _t_node_start = { _text select [_text_offset, 1] == "<" }; | |
private _t_node_start_fin = { _text select [_text_offset, 2] == "</" }; | |
private _t_node_end = { _text select [_text_offset, 1] == ">" }; | |
private _t_node_end_fin = { _text select [_text_offset, 2] == "/>" }; | |
private _t_eq = { _text select [_text_offset, 1] == "=" }; | |
private _t_quote = { _text select [_text_offset, 1] == """" }; | |
private _t_colon = { _text select [_text_offset, 1] == ":" }; | |
private _t_bs = { _text select [_text_offset, 1] == "\" }; | |
//PROLOG = prolog_start xml ATTRIBUTE prolog_end; | |
private _prolog_start = _t_prolog_start; | |
private _prolog = { | |
scopeName "PROLOG"; | |
if ([] call _prolog_start) then { | |
_text_offset = _text_offset + 2; | |
} else { throw [_line, _col, _text_offset, "Expected _t_prolog_start."]; }; | |
if ([] call _t_xml) then { | |
_text_offset = _text_offset + 3; | |
} else { throw [_line, _col, _text_offset, "Expected _t_xml."]; }; | |
if ([] call _attribute_start) then { | |
[] call _attribute; | |
} else { throw [_line, _col, _text_offset, "Expected _attribute."]; }; | |
if ([] call _t_prolog_end) then { | |
_text_offset = _text_offset + 2; | |
} else { throw [_line, _col, _text_offset, "Expected _t_prolog_end."]; }; | |
}; | |
//NAME = any [ colon any ] | |
private _name_start = { true }; | |
private _name = { | |
scopeName "NAME"; | |
private _ns = nil; | |
private _value = nil; | |
if ([] call _name_start) then { | |
private _index = _text_array select [_text_offset, count _text_array - _text_offset] findIf { (toString [_x] in _whitespaces) || _x in (toArray ":=""'<>&;") }; | |
if (_index == -1) then { _index = count _text - _text_offset; }; | |
_value = _text select [_text_offset, _index]; | |
_text_offset = _text_offset + _index; | |
} else { throw [_line, _col, _text_offset, "Expected _name."]; }; | |
if ([] call _t_colon) then { | |
_ns = _value; | |
private _index = _text_array select [_text_offset, count _text_array - _text_offset] findIf { (toString [_x] in _whitespaces) || _x in (toArray ":=""'<>&;") }; | |
if (_index == -1) then { _index = count _text - _text_offset; }; | |
_value = _text select [_text_offset, _index]; | |
}; | |
[_ns, _value] | |
}; | |
//NODE = node_start NAME { ATTRIBUTE } ( node_end_fin | node_end ( { NODE } | { TEXT } ) node_start_fin NAME node_end); | |
private _node_start = _t_node_start; | |
private _node = { | |
scopeName "NODE"; | |
private _attributes = []; | |
private _nodes = []; | |
private _isText = false; | |
private _output = [nil, _attributes, _nodes]; | |
if ([] call _t_node_start) then { | |
_text_offset = _text_offset + 1; | |
} else { throw [_line, _col, _text_offset, "Expected _t_node_start."]; }; | |
[] call _skip; | |
if ([] call _name_start) then { | |
_output set[0, [] call _name]; | |
} else { throw [_line, _col, _text_offset, "Expected _name_start."]; }; | |
[] call _skip; | |
while { !([] call _t_node_end) && !([] call _t_node_end_fin) } | |
do { | |
_attributes pushBack ([] call _attribute); | |
[] call _skip; | |
}; | |
if ([] call _t_node_end) then { | |
_text_offset = _text_offset + 1; | |
[] call _skip; | |
if ([] call _node_start) then { | |
while { !([] call _t_node_start_fin) && ([] call _node_start) } | |
do { | |
_nodes pushBack ([] call _node); | |
[] call _skip; | |
}; | |
} else { // text | |
_isText = true; | |
while { !([] call _t_node_start_fin) } | |
do { | |
_nodes pushBack (_text_array select _text_offset); | |
_text_offset = _text_offset + 1; | |
if ([] call _skip) then { _nodes pushBack (_whitespaces select 0); }; | |
}; | |
}; | |
if ([] call _t_node_start_fin) then { | |
_text_offset = _text_offset + 2; | |
} else { throw [_line, _col, _text_offset, "Expected _t_node_start_fin."]; }; | |
[] call _skip; | |
if ([] call _name_start) then { | |
if ((_output select 0) isEqualTo ([] call _name)) then { | |
throw [_line, _col, _text_offset, "Endnode not matching startnode."]; | |
}; | |
} else { throw [_line, _col, _text_offset, "Expected _name_start."]; }; | |
[] call _skip; | |
if ([] call _t_node_end) then { | |
_text_offset = _text_offset + 1; | |
} else { throw [_line, _col, _text_offset, "Expected _t_node_end."]; }; | |
} else { if ([] call _t_node_end_fin) then { | |
_text_offset = _text_offset + 2; | |
} else { throw [_line, _col, _text_offset, "Expected _t_node_end or _t_node_end_fin."]; }; }; | |
if (_isText) then { | |
_output set [2, toString (_output select 2)]; | |
}; | |
_output | |
}; | |
//ATTRIBUTE = NAME eq quote ( TEXT | bs quote ) quote; | |
private _attribute_start = _name_start; | |
private _attribute = { | |
scopeName "ATTRIBUTE"; | |
private _attributeName = nil; | |
private _attributeValue = []; | |
if ([] call _name_start) then { | |
_attributeName = [] call _name; | |
} else { throw [_line, _col, _text_offset, "Expected _name_start."]; }; | |
[] call _skip; | |
if ([] call _t_eq) then { | |
_text_offset = _text_offset + 1; | |
} else { throw [_line, _col, _text_offset, "Expected _t_eq."]; }; | |
[] call _skip; | |
if ([] call _t_quote) then { | |
_text_offset = _text_offset + 1; | |
private _escaped = false; | |
while { (_text_array select _text_offset) != (toArray """" select 0) || _escaped } | |
do { | |
private _c = _text_array select _text_offset; | |
_escaped = false; | |
if (!_escaped && _c == (toArray "\" select 0)) then { | |
_escaped = true; | |
} else { | |
_attributeValue pushBack (_text_array select _text_offset); | |
}; | |
_text_offset = _text_offset + 1; | |
}; | |
if ([] call _t_quote) then { | |
_text_offset = _text_offset + 1; | |
} else { throw [_line, _col, _text_offset, "Expected _t_quote."]; }; | |
} else { throw [_line, _col, _text_offset, "Expected _t_quote."]; }; | |
[_attributeName, toString _attributeValue]; | |
}; | |
scopeName "DOCUMENT"; | |
//DOCUMENT = [ PROLOG ] NODE; | |
private _prolog_res = []; | |
if ([] call _prolog_start) then { | |
_prolog_res = [] call _prolog; | |
}; | |
[] call _skip; | |
if ([] call _node_start) then { | |
_output pushBack ([] call _node); | |
} else { throw [_line, _col, _text_offset, "Expected _node."]; }; | |
[_prolog_res, _output] | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example Output:
for