Created
March 6, 2023 21:05
-
-
Save clshortfuse/697925e85a5c3d86e7691c11cd6ac823 to your computer and use it in GitHub Desktop.
JS XML Parser and Builder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @template T | |
* @typedef {import('./index.js').TupleTree<T>} TupleTree<T> | |
*/ | |
/** | |
* @template T | |
* @typedef {import('./index.js').TupleTreeEntry<T>} TupleTreeEntry<T> | |
*/ | |
import { | |
ATTRIBUTE_NODE_KEY, CDATA_NODE_KEY, CHARCODE_QUESTION, COMMENT_NODE_KEY, CONTENT_NODE_KEY, | |
} from './constants.js'; | |
/** | |
* @param {string|boolean|number} value | |
* @return {string} | |
*/ | |
function escapeContentValue(value) { | |
return value.toString() | |
.replace(/&/g, '&') | |
.replace(/</g, '<') | |
.replace(/\r/g, '
') | |
.replace(/]]>/g, ']]>'); | |
} | |
/** | |
* @param {string|boolean|number} value | |
* @return {string} | |
*/ | |
function escapeAttributeValue(value) { | |
return value.toString() | |
.replace(/&/g, '&') | |
.replace(/</g, '<') | |
.replace(/\r/g, '
') | |
.replace(/'/g, '''); | |
} | |
/** | |
* @param {string|boolean|number} value | |
* @return {string} | |
*/ | |
function escapeCommentValue(value) { | |
return value.toString() | |
.replace(/--/g, '-‐'); | |
} | |
/** | |
* @param {string} key | |
* @param {any} value | |
* @return {string} | |
*/ | |
export function buildXMLFromObject(key, value) { | |
/** @type [string, string][] */ | |
const attributes = []; | |
/** @type [string, any][] */ | |
const childNodes = []; | |
let textValue = null; | |
switch (typeof value) { | |
case 'symbol': | |
case 'function': | |
return ''; | |
case 'undefined': | |
textValue = ''; | |
break; | |
case 'object': | |
if (value === null) { | |
textValue = ''; | |
break; | |
} | |
if (Array.isArray(value)) { | |
return value.map((arrayValue) => buildXMLFromObject(key, arrayValue)).join(''); | |
} | |
if (value instanceof Date) { | |
textValue = value.toISOString(); | |
break; | |
} | |
for (const [entryKey, entryValue] of Object.entries(value)) { | |
switch (entryKey) { | |
case CONTENT_NODE_KEY: | |
textValue = entryValue; | |
break; | |
case ATTRIBUTE_NODE_KEY: | |
if (entryValue instanceof Map) { | |
attributes.push(...entryValue.entries()); | |
} else if (Array.isArray(entryValue)) { | |
attributes.push(...entryValue); | |
} else { | |
attributes.push(...Object.entries(entryValue)); | |
} | |
break; | |
default: | |
childNodes.push([entryKey, entryValue]); | |
} | |
} | |
break; | |
case 'string': | |
textValue = value; | |
break; | |
case 'boolean': | |
case 'number': | |
case 'bigint': | |
default: | |
textValue = value.toString(); | |
break; | |
} | |
if (key) { | |
if (key === COMMENT_NODE_KEY) { | |
return `<!--${escapeCommentValue(value)}-->`; | |
} | |
if (key === CDATA_NODE_KEY) { | |
return `<![CDATA[${value}]]>`; | |
} | |
const output = [ | |
'<', key, | |
attributes.length ? ' ' : '', | |
attributes.map(([attrName, attrValue]) => `${attrName}="${escapeAttributeValue(attrValue)}"`).join(' '), | |
]; | |
if (!childNodes.length && textValue == null) { | |
// eslint-disable-next-line unicorn/prefer-code-point | |
if (key.charCodeAt(0) === CHARCODE_QUESTION) { | |
output.push('?'); | |
} else { | |
output.push('/'); | |
} | |
output.push('>'); | |
} else { | |
output.push( | |
'>', | |
childNodes.map(([childKey, childValue]) => buildXMLFromObject(childKey, childValue)).join(''), | |
textValue ? escapeContentValue(textValue) : '', | |
'</', | |
key, | |
'>', | |
); | |
} | |
return output.join(''); | |
} | |
// Root | |
return [ | |
childNodes.map(([childKey, childValue]) => buildXMLFromObject(childKey, childValue)).join(''), | |
].join(''); | |
} | |
/** | |
* @param {TupleTreeEntry<string>} entry | |
* @return {string} | |
*/ | |
function buildXMLFromEntry([key, value]) { | |
/** @type {string[]} */ | |
switch (key) { | |
case ATTRIBUTE_NODE_KEY: | |
throw new Error('Invalid entry'); | |
case COMMENT_NODE_KEY: | |
if (typeof value !== 'string') throw new Error('Content nodes must be strings.'); | |
return `<!--${escapeCommentValue(value)}-->`; | |
case CDATA_NODE_KEY: | |
if (typeof value !== 'string') throw new Error('Content nodes must be strings.'); | |
return `<![CDATA[${value}]]>`; | |
case CONTENT_NODE_KEY: | |
if (typeof value !== 'string') throw new Error('Content nodes must be strings.'); | |
return escapeContentValue(value); | |
default: | |
} | |
if (typeof value === 'string') throw new Error('Child nodes must be tuples.'); | |
const output = []; | |
output.push('<', key); | |
let closed = false; | |
let selfClosed = false; | |
/** | |
* @param {boolean} selfClose | |
* @return {void} | |
*/ | |
function checkClose(selfClose = false) { | |
if (closed) return; | |
if (selfClose) { | |
selfClosed = true; | |
// eslint-disable-next-line unicorn/prefer-code-point | |
if (key.charCodeAt(0) === CHARCODE_QUESTION) { | |
output.push('?'); | |
} else { | |
output.push('/'); | |
} | |
} | |
output.push('>'); | |
closed = true; | |
} | |
// eslint-disable-next-line github/array-foreach | |
value.forEach(([childKey, childValue], index, array) => { | |
switch (childKey) { | |
case ATTRIBUTE_NODE_KEY: | |
if (typeof childValue === 'string') throw new Error('Attributes must be tuples.'); | |
for (const [attrName, attrValue] of childValue) { | |
output.push(' ', attrName, '="', escapeAttributeValue(/** @type {string} */ (attrValue)), '"'); | |
} | |
checkClose(index === array.length - 1); | |
break; | |
default: | |
checkClose(); | |
output.push(buildXMLFromEntry([childKey, childValue])); | |
} | |
}); | |
checkClose(); | |
if (!selfClosed) { | |
output.push('</', key, '>'); | |
} | |
return output.join(''); | |
} | |
/** | |
* @param {TupleTree<string>} entries | |
* @return {string} | |
*/ | |
export function buildXMLFromEntries(entries) { | |
return entries.map((entry) => buildXMLFromEntry(entry)).join(''); | |
} | |
/** | |
* @param {Object|TupleTree<string>} input | |
* @param {Object} [options] | |
* @param {boolean} [options.header] | |
* @return {string} | |
*/ | |
export function buildXML(input, options = {}) { | |
const result = (Array.isArray(input) ? buildXMLFromEntries(input) : buildXMLFromObject(null, input)); | |
if (options.header && !result.startsWith('<?xml')) { | |
return `<?xml version="1.0" encoding="utf-8"?>${result}`; | |
} | |
return result; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable unicorn/prefer-code-point */ | |
export const CONTENT_NODE_KEY = '$'; | |
export const ATTRIBUTE_NODE_KEY = '$$'; | |
export const AS_ARRAY_KEY = '$A'; | |
export const AS_OBJECT_KEY = '$O'; | |
export const AS_STRING_KEY = '$S'; | |
export const COMMENT_NODE_KEY = '!--'; | |
export const CDATA_NODE_KEY = '![CDATA['; | |
export const CHARCODE_QUESTION = '?'.charCodeAt(0); | |
export const NODE_TYPE_NONE = Symbol('NONE'); | |
export const NODE_TYPE_XML_DECL = Symbol('XML_DECL'); | |
export const NODE_TYPE_NOTATION_DECL = Symbol('NOTATION_DECL'); | |
export const NODE_TYPE_ROOT = Symbol('ROOT'); | |
export const NODE_TYPE_CHILD = Symbol('CHILD'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-use-before-define */ | |
export type TupleTreeEntry<T> = [T, T|TupleTreeEntry<T>[]]; | |
export type TupleTree<T> = [T, T|TupleTree<T>][]; | |
export type XMLObjectBase<T> = { | |
$A: { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends T[P][0][] ? XMLObject<T[P][0]>[] : | |
(XMLObjectBase<T[P]> & XMLObject<T[P]>)[] | |
) | |
}, | |
$O: { | |
[P in Extract<keyof T, string>]?: { | |
$$?: Record<string, string> | |
} & ( | |
T[P] extends T[P][0][] ? (XMLObjectBase<T[P][0]> & XMLObject<T[P][0]>) : | |
T[P] extends string ? { $:T[P] } : | |
T[P] extends number ? { $:string } : | |
T[P] extends Date ? { $:string } : | |
XMLObjectBase<T[P]> & XMLObject<T[P]>) | |
}, | |
$S: { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends string ? T[P] : string | |
) | |
}, | |
$$?: Record<string, string>, | |
}; | |
export type XMLObject<T> = XMLObjectBase<T> & { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends T[P][0][] ? (XMLObjectBase<T[P][0]> & XMLObject<T[P][0]>) : | |
T[P] extends string ? T[P] | XMLObject<{ $:T[P] }> : | |
T[P] extends number ? string | XMLObject<{ $:string }> : | |
T[P] extends Date ? string | XMLObject<{ $:string }> : | |
XMLObjectBase<T[P]> & XMLObject<T[P]> | |
) | |
}; | |
export type XMLObjectFlatBase<T> = { | |
$A: { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends T[P][0][] ? XMLObjectFlat<T[P][0]>[] : | |
(XMLObjectFlatBase<T[P]> & XMLObjectFlat<T[P]>)[] | |
) | |
}, | |
$O: { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends T[P][0][] ? (XMLObjectFlatBase<T[P][0]> & XMLObjectFlat<T[P][0]>) : | |
T[P] extends string ? { $:T[P] } : | |
T[P] extends number ? { $:string } : | |
T[P] extends Date ? { $:string } : | |
XMLObjectFlatBase<T[P]> & XMLObjectFlat<T[P]>) | |
}, | |
$S: { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends string ? T[P] : string | |
) | |
}, | |
}; | |
export type XMLObjectFlat<T> = XMLObjectFlatBase<T> & { | |
[P in Extract<keyof T, string>]?: ( | |
T[P] extends T[P][0][] ? (XMLObjectFlatBase<T[P][0]> & XMLObjectFlat<T[P][0]>) : | |
T[P] extends string ? T[P] : | |
T[P] extends number ? string : | |
T[P] extends Date ? string : | |
XMLObjectFlatBase<T[P]> & XMLObjectFlat<T[P]> | |
) | |
}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable unicorn/prefer-code-point */ | |
/** @see https://www.w3.org/TR/xml/ */ | |
import { | |
AS_ARRAY_KEY, | |
AS_OBJECT_KEY, | |
AS_STRING_KEY, | |
ATTRIBUTE_NODE_KEY, | |
CDATA_NODE_KEY, | |
CHARCODE_QUESTION, | |
COMMENT_NODE_KEY, | |
CONTENT_NODE_KEY, | |
} from './constants.js'; | |
/** | |
* @template T | |
* @typedef {import('./index.js').TupleTree<T>} TupleTree<T> | |
*/ | |
/** | |
* @template T | |
* @typedef {import('./index.js').TupleTreeEntry<T>} TupleTreeEntry<T> | |
*/ | |
/** | |
* @template T | |
* @typedef {import('./index.js').XMLObject<T>} XMLObject<T> | |
*/ | |
const END_POSITION_SYMBOL = Symbol('EndPositionSymbol'); | |
const STATE_BEGIN = Symbol('BEGIN'); | |
const STATE_PROLOG_OR_ROOT_OPEN = Symbol('PROLOG_OR_ROOT_OPEN'); | |
const STATE_MISC_WHITESPACE = Symbol('MISC_WHITESPACE'); | |
const STATE_DOCTYPE_OR_MISC_OR_ROOT_OPEN = Symbol('DOCTYPE_OR_MISC_OR_ROOT_OPEN'); | |
const STATE_XML_DECL_OPEN = Symbol('XML_DECL_OPEN'); | |
const STATE_XML_DECL_CLOSER = Symbol('XML_DECL_CLOSER'); | |
const STATE_NOTATION_OPEN = Symbol('NOTATION_OPEN'); | |
const STATE_COMMENT_OPEN = Symbol('COMMENT_OPEN'); | |
const STATE_COMMENT = Symbol('COMMENT'); | |
const STATE_COMMENT_CARRIAGE_RETURN = Symbol('COMMENT_CARRIAGE_RETURN'); | |
const STATE_COMMENT_HYPHEN = Symbol('COMMENT_HYPHEN'); | |
const STATE_COMMENT_CLOSE = Symbol('COMMENT_CLOSE'); | |
const STATE_START_TAG_OPEN = Symbol('START_TAG_OPEN'); | |
const STATE_START_TAG_NAME = Symbol('START_TAG_NAME'); | |
const STATE_START_TAG_WHITESPACE = Symbol('START_TAG_WHITESPACE'); | |
const STATE_ATTRIBUTE_NAME = Symbol('ATTRIBUTE_NAME'); | |
const STATE_ATTRIBUTE_EQUAL = Symbol('ATTRIBUTE_EQUAL'); | |
const STATE_ATTRIBUTE_VALUE = Symbol('ATTRIBUTE_VALUE'); | |
const STATE_ATTRIBUTE_VALUE_CLOSE = Symbol('ATTRIBUTE_VALUE_CLOSE'); | |
const STATE_ATTRIBUTE_REFERENCE = Symbol('ATTRIBUTE_REFERENCE'); | |
const STATE_ATTRIBUTE_CHAR_REFERENCE = Symbol('ATTRIBUTE_CHAR_REFERENCE'); | |
const STATE_ATTRIBUTE_CHAR_REFERENCE_HEX = Symbol('ATTRIBUTE_CHAR_REFERENCE_HEX'); | |
const STATE_ATTRIBUTE_CHAR_REFERENCE_DEC = Symbol('ATTRIBUTE_CHAR_REFERENCE_DEC'); | |
const STATE_ATTRIBUTE_ENTITY_REFERENCE = Symbol('ATTRIBUTE_ENTITY_REFERENCE'); | |
const STATE_SELF_CLOSING_TAG_CLOSER = Symbol('SELF_CLOSING_TAG_CLOSER'); | |
const STATE_CONTENT = Symbol('CONTENT'); | |
const STATE_CONTENT_CARRIAGE_RETURN = Symbol('CONTENT_CARRIAGE_RETURN'); | |
const STATE_CONTENT_CDATA_SELECTION_CLOSE_1 = Symbol('CONTENT_CDATA_SELECTION_CLOSE_1'); | |
const STATE_CONTENT_CDATA_SELECTION_CLOSE_2 = Symbol('CONTENT_CDATA_SELECTION_CLOSE_2'); | |
const STATE_CONTENT_REFERENCE = Symbol('CONTENT_REFERENCE'); | |
const STATE_CONTENT_CHAR_REFERENCE = Symbol('CONTENT_CHAR_REFERENCE'); | |
const STATE_CONTENT_CHAR_REFERENCE_HEX = Symbol('CONTENT_CHAR_REFERENCE_HEX'); | |
const STATE_CONTENT_CHAR_REFERENCE_DEC = Symbol('CONTENT_CHAR_REFERENCE_DEC'); | |
const STATE_CONTENT_ENTITY_REFERENCE = Symbol('CONTENT_ENTITY_REFERENCE'); | |
const STATE_CDATA_OPEN = Symbol('STATE_CDATA_OPEN'); | |
const STATE_CDATA_C = Symbol('STATE_CDATA_C'); | |
const STATE_CDATA_CD = Symbol('STATE_CDATA_CD'); | |
const STATE_CDATA_CDA = Symbol('STATE_CDATA_CDA'); | |
const STATE_CDATA_CDAT = Symbol('STATE_CDATA_CDAT'); | |
const STATE_CDATA_CARRIAGE_RETURN = Symbol('STATE_CDATA_CARRIAGE_RETURN'); | |
const STATE_CDATA_DATA_START = Symbol('STATE_CDATA_DATA_START'); | |
const STATE_CDATA_DATA = Symbol('STATE_CDATA_DATA'); | |
const STATE_CDATA_DATA_END = Symbol('STATE_CDATA_DATA_END'); | |
const STATE_CDATA_CLOSE = Symbol('STATE_CDATA_CLOSE'); | |
const STATE_UNKNOWN_TAG_OPEN = Symbol('UNKNOWN_TAG_OPEN'); | |
const STATE_CHILD_NODE = Symbol('CHILD_NODE'); | |
const STATE_END_TAG_OPEN = Symbol('END_TAG_OPEN'); | |
const STATE_END_TAG_NAME = Symbol('END_TAG_NAME'); | |
const STATE_END_TAG_WHITESPACE = Symbol('END_TAG_WHITESPACE'); | |
const STATE_END_TAG_CLOSE = Symbol('END_TAG_CLOSE'); | |
const NODE_TYPE_NONE = Symbol('NONE'); | |
const NODE_TYPE_XML_DECL = Symbol('XML_DECL'); | |
const NODE_TYPE_NOTATION_DECL = Symbol('NOTATION_DECL'); | |
const NODE_TYPE_ROOT = Symbol('ROOT'); | |
const NODE_TYPE_CHILD = Symbol('CHILD'); | |
const CHARCODE_SPACE = ' '.charCodeAt(0); | |
const CHARCODE_HTAB = '\t'.charCodeAt(0); | |
const CHARCODE_CR = '\r'.charCodeAt(0); | |
const CHARCODE_LF = '\n'.charCodeAt(0); | |
const CHARCODE_COLON = ':'.charCodeAt(0); | |
const CHARCODE_UNDERSCORE = '_'.charCodeAt(0); | |
const CHARCODE_HYPHEN = '-'.charCodeAt(0); | |
const CHARCODE_PERIOD = '.'.charCodeAt(0); | |
const CHARCODE_MIDDLE_DOT = '·'.charCodeAt(0); | |
const CHARCODE_LESS_THAN = '<'.charCodeAt(0); | |
const CHARCODE_SLASH = '/'.charCodeAt(0); | |
const CHARCODE_BANG = '!'.charCodeAt(0); | |
const CHARCODE_GREATER_THAN = '>'.charCodeAt(0); | |
const CHARCODE_EQUALS = '='.charCodeAt(0); | |
const CHARCODE_DOUBLE_QUOTE = '"'.charCodeAt(0); | |
const CHARCODE_SINGLE_QUOTE = "'".charCodeAt(0); | |
const CHARCODE_AMP = '&'.charCodeAt(0); | |
const CHARCODE_HASH = '#'.charCodeAt(0); | |
const CHARCODE_SEMICOLON = ';'.charCodeAt(0); | |
const CHARCODE_LOWERCASE_X = 'x'.charCodeAt(0); | |
const CHARCODE_OPEN_BRACKET = '['.charCodeAt(0); | |
const CHARCODE_C = 'C'.charCodeAt(0); | |
const CHARCODE_D = 'D'.charCodeAt(0); | |
const CHARCODE_A = 'A'.charCodeAt(0); | |
const CHARCODE_T = 'T'.charCodeAt(0); | |
// const CHARCODE_A = 'T'.charCodeAt(0); | |
const CHARCODE_CLOSE_BRACKET = ']'.charCodeAt(0); | |
const NAME_START_RANGES = [ | |
['A'.charCodeAt(0), 'Z'.charCodeAt(0)], | |
['a'.charCodeAt(0), 'z'.charCodeAt(0)], | |
[0xC0, 0xD6], | |
// 0xD7 × | |
[0xD8, 0xF6], | |
// 0xF7 ÷ | |
[0xF8, 0x02_FF], | |
[0x03_70, 0x03_7D], | |
// 0x37E ; | |
[0x03_7F, 0x1F_FF], | |
[0x20_0C, 0x20_0D], | |
[0x20_70, 0x21_8F], | |
[0x2C_00, 0x2F_EF], | |
[0x30_01, 0xD7_FF], | |
[0xF9_00, 0xFD_CF], | |
[0xFD_F0, 0xFF_FD], | |
[0x01_00_00, 0x0E_FF_FF], | |
]; | |
const NAME_RANGES = [ | |
...NAME_START_RANGES, | |
['0'.charCodeAt(0), '9'.charCodeAt(0)], | |
[0x03_00, 0x03_6F], | |
[0x20_3F, 0x20_40], | |
]; | |
const CHARACTER_RANGES = [ | |
[0x20, 0xD7_FF], | |
[0xE0_00, 0xFF_FD], | |
[0x01_00_00, 0x10_FF_FF], | |
]; | |
const CHAR_REFERENCE_DEC_RANGES = [ | |
['0'.charCodeAt(0), '9'.charCodeAt(0)], | |
]; | |
const CHAR_REFERENCE_HEX_RANGES = [ | |
['0'.charCodeAt(0), '9'.charCodeAt(0)], | |
['a'.charCodeAt(0), 'f'.charCodeAt(0)], | |
['A'.charCodeAt(0), 'F'.charCodeAt(0)], | |
]; | |
const PREDEFINED_ENTITIES = new Map([ | |
['amp', '&'], | |
['lt', '<'], | |
['gt', '>'], | |
['apos', "'"], | |
['quot', '"'], | |
]); | |
/** | |
* @param {number} reference | |
* @return {string} | |
*/ | |
function parseCharReference(reference) { | |
if (reference > 0xFF_FF) { | |
return String.fromCharCode( | |
Math.floor((reference - 0x01_00_00) / 0x04_00) + 0xD8_00, | |
((reference - 0x01_00_00) % 0x04_00) + 0xDC_00, | |
); | |
} | |
if (reference >= 0) { | |
return String.fromCharCode(reference); | |
} | |
throw new Error(`Invalid CharRef (${reference})`); | |
} | |
/** | |
* @param {string} [entity] | |
* @param {Map<string,string>} [declaredEntities] | |
* @return {string} | |
*/ | |
function parseEntityReference(entity, declaredEntities) { | |
if (declaredEntities?.has(entity)) { | |
return declaredEntities.get(entity); | |
} | |
if (PREDEFINED_ENTITIES.has(entity)) { | |
return PREDEFINED_ENTITIES.get(entity); | |
} | |
throw new Error(`Unknown entity: ${entity}`); | |
} | |
/** | |
* @param {string} input | |
* @param {Object} options | |
* @param {number} [options.index=0] | |
* @param {number} [options.charCode] | |
* @param {Symbol} [options.nodeType] | |
* @param {boolean} [options.enforceUniqueAttributes=true] | |
* @param {boolean} [options.enforceEntityDeclared=true] | |
* @return {TupleTreeEntry<string>} | |
*/ | |
function parseXMLNode(input, options = {}) { | |
/** @type {Symbol} */ | |
let state; | |
switch (options.nodeType) { | |
default: | |
case NODE_TYPE_NONE: | |
state = STATE_BEGIN; | |
break; | |
case NODE_TYPE_XML_DECL: | |
state = STATE_XML_DECL_OPEN; | |
break; | |
case NODE_TYPE_NOTATION_DECL: | |
state = STATE_NOTATION_OPEN; | |
break; | |
case NODE_TYPE_ROOT: | |
case NODE_TYPE_CHILD: | |
state = STATE_START_TAG_OPEN; | |
} | |
/** @type {TupleTree<string>} */ | |
const children = []; | |
/** @type {string} */ | |
let stringReturnValue; | |
/** @type {string} */ | |
let tagName; | |
/** @type {string} */ | |
let attrName; | |
/** @type {string} */ | |
let attrValue; | |
/** @type {number} */ | |
let attrValueDelimiter; | |
/** @type {number} */ | |
let reference; | |
/** @type {string} */ | |
let entity; | |
/** @type {string} */ | |
let content; | |
/** @type {string} */ | |
let comment; | |
/** @type {string} */ | |
let cdata; | |
/** @type {TupleTreeEntry<string>} */ | |
let childNode; | |
// TODO: Add declared entities support | |
const declaredEntities = new Map(); | |
let index = options.index ?? 0; | |
let charCode = options.charCode ?? input.charCodeAt(index); | |
let stringStartIndex = index; | |
/** @type {string} */ | |
let xmlSpace; | |
/** @type {Set<string>} */ | |
const attributeNames = new Set(); | |
/** @type {[string,string][]} */ | |
const attributes = []; | |
let hasContent = false; | |
let selfClosing = false; | |
// let previousState = state; | |
/** | |
* @param {string} key | |
* @param {string|TupleTree<string>} value | |
* @return {void} | |
*/ | |
function addChild(key, value) { | |
children.push([key, value]); | |
} | |
const resetContent = () => { | |
hasContent = false; | |
content = ''; | |
}; | |
const onContentEnd = () => { | |
content += input.slice(stringStartIndex, index); | |
if (hasContent || xmlSpace === 'preserve') { | |
addChild(CONTENT_NODE_KEY, content); | |
} | |
}; | |
/** | |
* @param {number[][]} ranges | |
* @return {void} | |
*/ | |
const assertCharCodeRange = (ranges) => { | |
if (!ranges.some(([min, max]) => charCode >= min && charCode <= max)) { | |
throw new Error(`Invalid character ${(input[index])} at ${index}.`); | |
} | |
}; | |
const onCommentCharCode = () => { | |
switch (charCode) { | |
case CHARCODE_HYPHEN: | |
state = STATE_COMMENT_HYPHEN; | |
break; | |
case CHARCODE_CR: | |
content += input.slice(stringStartIndex, index); | |
state = STATE_COMMENT_CARRIAGE_RETURN; | |
break; | |
default: | |
assertCharCodeRange(CHARACTER_RANGES); | |
// Fallthrough | |
case CHARCODE_HTAB: | |
case CHARCODE_LF: | |
state = STATE_COMMENT; | |
} | |
}; | |
const onCDataCharCode = () => { | |
switch (charCode) { | |
case CHARCODE_CLOSE_BRACKET: | |
state = STATE_CDATA_CLOSE; | |
break; | |
case CHARCODE_CR: | |
cdata += input.slice(stringStartIndex, index); | |
state = STATE_CDATA_CARRIAGE_RETURN; | |
break; | |
default: | |
assertCharCodeRange(CHARACTER_RANGES); | |
// Fallthrough | |
case CHARCODE_HTAB: | |
case CHARCODE_LF: | |
state = STATE_CDATA_DATA; | |
} | |
}; | |
const onContentCharCode = () => { | |
switch (charCode) { | |
case CHARCODE_LESS_THAN: | |
onContentEnd(); | |
state = STATE_UNKNOWN_TAG_OPEN; | |
break; | |
case CHARCODE_CR: | |
content += input.slice(stringStartIndex, index); | |
state = STATE_CONTENT_CARRIAGE_RETURN; | |
break; | |
case CHARCODE_SPACE: case CHARCODE_HTAB: case CHARCODE_LF: | |
break; | |
case CHARCODE_CLOSE_BRACKET: | |
state = STATE_CONTENT_CDATA_SELECTION_CLOSE_1; | |
hasContent = true; | |
break; | |
case CHARCODE_AMP: | |
content += input.slice(stringStartIndex, index); | |
state = STATE_CONTENT_REFERENCE; | |
// Fallthrough | |
default: | |
hasContent = true; | |
} | |
}; | |
/** | |
* @param {Symbol} nodeType | |
* @return {void} | |
*/ | |
const onTagOpen = (nodeType) => { | |
// console.log('onTagOpen', nodeType.description); | |
switch (charCode) { | |
case CHARCODE_SLASH: | |
state = STATE_END_TAG_OPEN; | |
break; | |
default: | |
state = STATE_CHILD_NODE; | |
childNode = parseXMLNode(input, { index, charCode, nodeType }); | |
if (nodeType === NODE_TYPE_XML_DECL && childNode[0] !== '?xml') { | |
throw new Error('Unknown declaration type'); | |
} | |
// @ts-ignore Hidden Symbol | |
index = childNode[END_POSITION_SYMBOL]; | |
stringStartIndex = index + 1; | |
resetContent(); | |
// @ts-ignore Hidden Symbol | |
delete childNode[END_POSITION_SYMBOL]; | |
children.push(childNode); | |
state = options.nodeType === NODE_TYPE_CHILD ? STATE_CONTENT : STATE_MISC_WHITESPACE; | |
} | |
}; | |
// const logState = () => { | |
// console.log( | |
// index, | |
// String.fromCharCode(charCode), | |
// previousState.description, | |
// '=>', | |
// state.description, | |
// String.fromCharCode(charCode), | |
// ); | |
// }; | |
const getUnexpectedCharacterError = () => new Error(`Invalid character ${(input[index])} at ${index}.`); | |
const buildReturnValue = () => { | |
/** @type {TupleTreeEntry<string>} */ | |
let tuple; | |
if (stringReturnValue != null) { | |
tuple = [tagName, stringReturnValue]; | |
} else { | |
/** @type {TupleTree<string>} */ | |
const entries = []; | |
if (attributes.length) { | |
entries.push([ATTRIBUTE_NODE_KEY, attributes]); | |
} | |
if (children.length) { | |
entries.push(...children); | |
} else if (!selfClosing) { | |
entries.push([CONTENT_NODE_KEY, '']); | |
} | |
tuple = [tagName, entries]; | |
} | |
Object.defineProperty(tuple, END_POSITION_SYMBOL, { | |
enumerable: false, configurable: true, value: index, writable: false, | |
}); | |
return tuple; | |
}; | |
while (Number.isNaN(charCode) === false) { | |
switch (state) { | |
case STATE_BEGIN: | |
switch (charCode) { | |
case CHARCODE_LESS_THAN: | |
state = STATE_PROLOG_OR_ROOT_OPEN; | |
break; | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
state = STATE_MISC_WHITESPACE; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case STATE_PROLOG_OR_ROOT_OPEN: | |
switch (charCode) { | |
case CHARCODE_QUESTION: | |
onTagOpen(NODE_TYPE_XML_DECL); | |
break; | |
case CHARCODE_BANG: | |
onTagOpen(NODE_TYPE_NOTATION_DECL); | |
break; | |
default: | |
onTagOpen(NODE_TYPE_CHILD); | |
} | |
break; | |
case STATE_MISC_WHITESPACE: | |
switch (charCode) { | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
state = STATE_MISC_WHITESPACE; | |
break; | |
case CHARCODE_LESS_THAN: | |
state = STATE_DOCTYPE_OR_MISC_OR_ROOT_OPEN; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case STATE_XML_DECL_OPEN: | |
case STATE_NOTATION_OPEN: | |
state = STATE_START_TAG_OPEN; | |
break; | |
case STATE_XML_DECL_CLOSER: | |
if (charCode !== CHARCODE_GREATER_THAN) { | |
throw getUnexpectedCharacterError(); | |
} | |
selfClosing = true; | |
state = STATE_END_TAG_CLOSE; | |
break; | |
case STATE_COMMENT_OPEN: | |
if (charCode !== CHARCODE_HYPHEN) { | |
throw getUnexpectedCharacterError(); | |
} | |
comment = ''; | |
stringStartIndex = index + 1; | |
state = STATE_COMMENT; | |
break; | |
case STATE_COMMENT: | |
onCommentCharCode(); | |
break; | |
case STATE_COMMENT_CARRIAGE_RETURN: | |
switch (charCode) { | |
default: | |
content += '\n'; | |
// Fallthrough | |
case CHARCODE_LF: | |
stringStartIndex = index; | |
onCommentCharCode(); | |
break; | |
} | |
break; | |
case STATE_COMMENT_HYPHEN: | |
if (charCode === CHARCODE_HYPHEN) { | |
comment += input.slice(stringStartIndex, index - 1); | |
state = STATE_COMMENT_CLOSE; | |
break; | |
} | |
onCommentCharCode(); | |
break; | |
case STATE_COMMENT_CLOSE: | |
if (charCode !== CHARCODE_GREATER_THAN) { | |
throw getUnexpectedCharacterError(); | |
} | |
tagName = COMMENT_NODE_KEY; | |
stringReturnValue = comment; | |
state = STATE_END_TAG_CLOSE; | |
break; | |
case STATE_START_TAG_OPEN: | |
switch (charCode) { | |
case CHARCODE_OPEN_BRACKET: | |
if (options.nodeType !== NODE_TYPE_NOTATION_DECL) { | |
throw new Error(`Invalid character ${(input[index])} at ${index}.`); | |
} | |
state = STATE_CDATA_OPEN; | |
break; | |
case CHARCODE_HYPHEN: | |
if (options.nodeType !== NODE_TYPE_NOTATION_DECL) { | |
throw new Error(`Invalid character ${(input[index])} at ${index}.`); | |
} | |
state = STATE_COMMENT_OPEN; | |
break; | |
default: | |
assertCharCodeRange(NAME_START_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
state = STATE_START_TAG_NAME; | |
} | |
break; | |
case STATE_START_TAG_NAME: | |
switch (charCode) { | |
default: | |
assertCharCodeRange(NAME_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
case CHARCODE_HYPHEN: case CHARCODE_PERIOD: case CHARCODE_MIDDLE_DOT: | |
break; | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
tagName = input.slice(stringStartIndex, index); | |
state = STATE_START_TAG_WHITESPACE; | |
break; | |
case CHARCODE_SLASH: | |
if (options.nodeType === NODE_TYPE_CHILD) { | |
tagName = input.slice(stringStartIndex, index); | |
state = STATE_SELF_CLOSING_TAG_CLOSER; | |
} else { | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case CHARCODE_GREATER_THAN: | |
tagName = input.slice(stringStartIndex, index); | |
switch (options.nodeType) { | |
case NODE_TYPE_CHILD: | |
stringStartIndex = index + 1; | |
resetContent(); | |
state = STATE_CONTENT; | |
break; | |
case NODE_TYPE_NOTATION_DECL: | |
state = STATE_END_TAG_CLOSE; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
} | |
break; | |
case STATE_ATTRIBUTE_VALUE_CLOSE: | |
switch (charCode) { | |
case CHARCODE_QUESTION: | |
if (options.nodeType === NODE_TYPE_XML_DECL) { | |
state = STATE_XML_DECL_CLOSER; | |
break; | |
} | |
// Fallthrough | |
default: | |
throw getUnexpectedCharacterError(); | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
state = STATE_START_TAG_WHITESPACE; | |
break; | |
case CHARCODE_SLASH: | |
if (options.nodeType === NODE_TYPE_CHILD) { | |
state = STATE_SELF_CLOSING_TAG_CLOSER; | |
} else { | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case CHARCODE_GREATER_THAN: | |
switch (options.nodeType) { | |
case NODE_TYPE_CHILD: | |
stringStartIndex = index + 1; | |
resetContent(); | |
state = STATE_CONTENT; | |
break; | |
case NODE_TYPE_NOTATION_DECL: | |
state = STATE_END_TAG_CLOSE; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
} | |
break; | |
case STATE_START_TAG_WHITESPACE: | |
switch (charCode) { | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
state = STATE_START_TAG_WHITESPACE; | |
break; | |
case CHARCODE_QUESTION: | |
if (options.nodeType === NODE_TYPE_XML_DECL) { | |
state = STATE_XML_DECL_CLOSER; | |
} else { | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case CHARCODE_SLASH: | |
if (options.nodeType === NODE_TYPE_CHILD) { | |
state = STATE_SELF_CLOSING_TAG_CLOSER; | |
} else { | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case CHARCODE_GREATER_THAN: | |
switch (options.nodeType) { | |
case NODE_TYPE_CHILD: | |
stringStartIndex = index + 1; | |
resetContent(); | |
state = STATE_CONTENT; | |
break; | |
case NODE_TYPE_NOTATION_DECL: | |
state = STATE_END_TAG_CLOSE; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
default: | |
assertCharCodeRange(NAME_START_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
stringStartIndex = index; | |
state = STATE_ATTRIBUTE_NAME; | |
} | |
break; | |
case STATE_ATTRIBUTE_NAME: | |
switch (charCode) { | |
default: | |
assertCharCodeRange(NAME_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
case CHARCODE_HYPHEN: case CHARCODE_PERIOD: case CHARCODE_MIDDLE_DOT: | |
break; | |
case CHARCODE_EQUALS: | |
attrName = input.slice(stringStartIndex, index); | |
if (options.enforceUniqueAttributes !== false && attributeNames.has(attrName)) { | |
throw new Error(`Attribute name (${attrName}) must be unique at ${index}.`); | |
} | |
attributeNames.add(attrName); | |
state = STATE_ATTRIBUTE_EQUAL; | |
break; | |
} | |
break; | |
case STATE_ATTRIBUTE_EQUAL: | |
switch (charCode) { | |
case CHARCODE_SINGLE_QUOTE: | |
case CHARCODE_DOUBLE_QUOTE: | |
attrValueDelimiter = charCode; | |
state = STATE_ATTRIBUTE_VALUE; | |
stringStartIndex = index + 1; | |
attrValue = ''; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case STATE_ATTRIBUTE_VALUE: | |
switch (charCode) { | |
case attrValueDelimiter: | |
attrValue += input.slice(stringStartIndex, index); | |
attributes.push([attrName, attrValue]); | |
if (attrName === 'xml:space') xmlSpace = attrValue; | |
state = STATE_ATTRIBUTE_VALUE_CLOSE; | |
break; | |
case CHARCODE_LESS_THAN: | |
throw getUnexpectedCharacterError(); | |
case CHARCODE_AMP: | |
attrValue += input.slice(stringStartIndex, index); | |
state = STATE_ATTRIBUTE_REFERENCE; | |
break; | |
default: | |
} | |
break; | |
case STATE_ATTRIBUTE_REFERENCE: | |
switch (charCode) { | |
case CHARCODE_HASH: | |
state = STATE_ATTRIBUTE_CHAR_REFERENCE; | |
break; | |
default: | |
assertCharCodeRange(NAME_START_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
stringStartIndex = index; | |
state = STATE_ATTRIBUTE_ENTITY_REFERENCE; | |
} | |
break; | |
case STATE_ATTRIBUTE_ENTITY_REFERENCE: | |
switch (charCode) { | |
case CHARCODE_SEMICOLON: | |
entity = parseEntityReference(input.slice(stringStartIndex, index), declaredEntities); | |
attrValue += entity; | |
state = STATE_ATTRIBUTE_VALUE; | |
stringStartIndex = index + 1; | |
break; | |
default: | |
assertCharCodeRange(NAME_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
case CHARCODE_HYPHEN: case CHARCODE_PERIOD: case CHARCODE_MIDDLE_DOT: | |
} | |
break; | |
case STATE_ATTRIBUTE_CHAR_REFERENCE: | |
switch (charCode) { | |
case CHARCODE_LOWERCASE_X: | |
stringStartIndex = index + 1; | |
state = STATE_ATTRIBUTE_CHAR_REFERENCE_HEX; | |
break; | |
default: | |
assertCharCodeRange(CHAR_REFERENCE_DEC_RANGES); | |
stringStartIndex = index; | |
state = STATE_ATTRIBUTE_CHAR_REFERENCE_DEC; | |
} | |
break; | |
case STATE_ATTRIBUTE_CHAR_REFERENCE_DEC: | |
switch (charCode) { | |
case CHARCODE_SEMICOLON: | |
reference = Number.parseInt(input.slice(stringStartIndex, index), 10); | |
attrValue += parseCharReference(reference); | |
stringStartIndex = index + 1; | |
state = STATE_ATTRIBUTE_VALUE; | |
break; | |
default: | |
assertCharCodeRange(CHAR_REFERENCE_DEC_RANGES); | |
} | |
break; | |
case STATE_ATTRIBUTE_CHAR_REFERENCE_HEX: | |
switch (charCode) { | |
case CHARCODE_SEMICOLON: | |
reference = Number.parseInt(input.slice(stringStartIndex, index), 16); | |
attrValue += parseCharReference(reference); | |
stringStartIndex = index + 1; | |
state = STATE_ATTRIBUTE_VALUE; | |
break; | |
default: | |
assertCharCodeRange(CHAR_REFERENCE_HEX_RANGES); | |
} | |
break; | |
case STATE_SELF_CLOSING_TAG_CLOSER: | |
if (charCode !== CHARCODE_GREATER_THAN) { | |
throw getUnexpectedCharacterError(); | |
} | |
selfClosing = true; | |
state = STATE_END_TAG_CLOSE; | |
break; | |
case STATE_CONTENT_CARRIAGE_RETURN: | |
switch (charCode) { | |
default: | |
content += '\n'; | |
// Fallthrough | |
case CHARCODE_LF: | |
stringStartIndex = index; | |
onContentCharCode(); | |
break; | |
} | |
break; | |
case STATE_CONTENT_CDATA_SELECTION_CLOSE_2: | |
if (charCode === CHARCODE_GREATER_THAN) throw getUnexpectedCharacterError(); | |
// Fallthrough | |
case STATE_CONTENT_CDATA_SELECTION_CLOSE_1: | |
if (charCode === CHARCODE_CLOSE_BRACKET) { | |
state = STATE_CONTENT_CDATA_SELECTION_CLOSE_2; | |
hasContent = true; | |
break; | |
} | |
// Fallthrough | |
case STATE_CONTENT: | |
onContentCharCode(); | |
break; | |
case STATE_CONTENT_REFERENCE: | |
switch (charCode) { | |
case CHARCODE_HASH: | |
state = STATE_CONTENT_CHAR_REFERENCE; | |
break; | |
default: | |
assertCharCodeRange(NAME_START_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
stringStartIndex = index; | |
state = STATE_CONTENT_ENTITY_REFERENCE; | |
} | |
break; | |
case STATE_CONTENT_ENTITY_REFERENCE: | |
switch (charCode) { | |
case CHARCODE_SEMICOLON: | |
entity = parseEntityReference(input.slice(stringStartIndex, index), declaredEntities); | |
content += entity; | |
hasContent = true; | |
stringStartIndex = index + 1; | |
state = STATE_CONTENT; | |
break; | |
default: | |
assertCharCodeRange(NAME_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
case CHARCODE_HYPHEN: case CHARCODE_PERIOD: case CHARCODE_MIDDLE_DOT: | |
} | |
break; | |
case STATE_CONTENT_CHAR_REFERENCE: | |
switch (charCode) { | |
case CHARCODE_LOWERCASE_X: | |
stringStartIndex = index + 1; | |
state = STATE_CONTENT_CHAR_REFERENCE_HEX; | |
break; | |
default: | |
assertCharCodeRange(CHAR_REFERENCE_DEC_RANGES); | |
stringStartIndex = index; | |
state = STATE_CONTENT_CHAR_REFERENCE_DEC; | |
} | |
break; | |
case STATE_CONTENT_CHAR_REFERENCE_DEC: | |
switch (charCode) { | |
case CHARCODE_SEMICOLON: | |
reference = Number.parseInt(input.slice(stringStartIndex, index), 10); | |
content += parseCharReference(reference); | |
hasContent = true; | |
stringStartIndex = index + 1; | |
state = STATE_CONTENT; | |
break; | |
default: | |
assertCharCodeRange(CHAR_REFERENCE_DEC_RANGES); | |
} | |
break; | |
case STATE_CONTENT_CHAR_REFERENCE_HEX: | |
switch (charCode) { | |
case CHARCODE_SEMICOLON: | |
reference = Number.parseInt(input.slice(stringStartIndex, index), 16); | |
content += parseCharReference(reference); | |
hasContent = true; | |
stringStartIndex = index + 1; | |
state = STATE_CONTENT; | |
break; | |
default: | |
assertCharCodeRange(CHAR_REFERENCE_HEX_RANGES); | |
} | |
break; | |
case STATE_DOCTYPE_OR_MISC_OR_ROOT_OPEN: | |
case STATE_UNKNOWN_TAG_OPEN: | |
switch (charCode) { | |
case CHARCODE_SLASH: | |
state = STATE_END_TAG_OPEN; | |
break; | |
case CHARCODE_BANG: | |
onTagOpen(NODE_TYPE_NOTATION_DECL); | |
break; | |
default: | |
onTagOpen(NODE_TYPE_CHILD); | |
} | |
break; | |
case STATE_END_TAG_OPEN: | |
switch (charCode) { | |
default: | |
assertCharCodeRange(NAME_START_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
stringStartIndex = index; | |
state = STATE_END_TAG_NAME; | |
} | |
break; | |
case STATE_END_TAG_NAME: | |
switch (charCode) { | |
default: | |
assertCharCodeRange(NAME_RANGES); | |
// Fallthrough | |
case CHARCODE_COLON: case CHARCODE_UNDERSCORE: | |
case CHARCODE_HYPHEN: case CHARCODE_PERIOD: case CHARCODE_MIDDLE_DOT: | |
break; | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
if (input.slice(stringStartIndex, index) !== tagName) { | |
throw new Error(`Element was not closed (${tagName}) at ${index}.`); | |
} | |
state = STATE_END_TAG_WHITESPACE; | |
break; | |
case CHARCODE_GREATER_THAN: | |
if (input.slice(stringStartIndex, index) !== tagName) { | |
throw new Error(`Element was not closed (${tagName}) at ${index}.`); | |
} | |
state = STATE_END_TAG_CLOSE; | |
} | |
break; | |
case STATE_END_TAG_WHITESPACE: | |
switch (charCode) { | |
case CHARCODE_SPACE: case CHARCODE_CR: case CHARCODE_HTAB: case CHARCODE_LF: | |
break; | |
case CHARCODE_GREATER_THAN: | |
state = STATE_END_TAG_CLOSE; | |
break; | |
default: | |
throw getUnexpectedCharacterError(); | |
} | |
break; | |
case STATE_CDATA_OPEN: | |
if (charCode !== CHARCODE_C) throw getUnexpectedCharacterError(); | |
state = STATE_CDATA_C; | |
break; | |
case STATE_CDATA_C: | |
if (charCode !== CHARCODE_D) throw getUnexpectedCharacterError(); | |
state = STATE_CDATA_CD; | |
break; | |
case STATE_CDATA_CD: | |
if (charCode !== CHARCODE_A) throw getUnexpectedCharacterError(); | |
state = STATE_CDATA_CDA; | |
break; | |
case STATE_CDATA_CDA: | |
if (charCode !== CHARCODE_T) throw getUnexpectedCharacterError(); | |
state = STATE_CDATA_CDAT; | |
break; | |
case STATE_CDATA_CDAT: | |
if (charCode !== CHARCODE_A) throw getUnexpectedCharacterError(); | |
state = STATE_CDATA_DATA_START; | |
break; | |
case STATE_CDATA_DATA_START: | |
if (charCode !== CHARCODE_OPEN_BRACKET) throw getUnexpectedCharacterError(); | |
cdata = ''; | |
stringStartIndex = index + 1; | |
state = STATE_CDATA_DATA; | |
break; | |
case STATE_CDATA_DATA: | |
onCDataCharCode(); | |
break; | |
case STATE_CDATA_CARRIAGE_RETURN: | |
switch (charCode) { | |
default: | |
cdata += '\n'; | |
// Fallthrough | |
case CHARCODE_LF: | |
stringStartIndex = index; | |
onCDataCharCode(); | |
break; | |
} | |
break; | |
case STATE_CDATA_DATA_END: | |
if (charCode === CHARCODE_CLOSE_BRACKET) { | |
state = STATE_CDATA_CLOSE; | |
} else { | |
onCDataCharCode(); | |
} | |
break; | |
case STATE_CDATA_CLOSE: | |
if (charCode === CHARCODE_GREATER_THAN) { | |
tagName = CDATA_NODE_KEY; | |
cdata += input.slice(stringStartIndex, index - 2); | |
stringReturnValue = cdata; | |
state = STATE_END_TAG_CLOSE; | |
} else { | |
onCDataCharCode(); | |
} | |
break; | |
default: | |
} | |
// logState(); | |
// previousState = state; | |
if (state === STATE_END_TAG_CLOSE) { | |
// console.log('close', input.slice(options.index, index).replace(/\n/g, '\\n').slice(0, 60)); | |
return buildReturnValue(); | |
} | |
index += 1; | |
charCode = input.charCodeAt(index); | |
} | |
switch (state) { | |
case STATE_CONTENT: | |
if (options.nodeType === NODE_TYPE_CHILD) break; | |
// Fallthrough | |
case STATE_END_TAG_CLOSE: | |
case STATE_MISC_WHITESPACE: | |
return buildReturnValue(); | |
default: | |
} | |
throw new Error('EOF'); | |
} | |
/** | |
* @typedef {Object} ParseXMLFlattenOptions | |
* @prop {boolean} [flattenContent=true] | |
* @prop {boolean} [flattenArrays=true] | |
* @prop {boolean} [mergeContentNodes=true] | |
* @prop {boolean} [skipAttributes=false] | |
* @prop {boolean} [removeNamespaces=false] | |
*/ | |
/** | |
* @param {TupleTree<string>} parsedXML | |
* @param {ParseXMLFlattenOptions} [options] | |
* @return {XMLObject<unknown>} | |
*/ | |
export function flattenParsedXML(parsedXML, options = {}) { | |
if (!parsedXML.length) return; | |
/** @type {any} */ | |
const result = {}; | |
Object.defineProperty(result, AS_ARRAY_KEY, { | |
enumerable: false, configurable: true, value: {}, writable: false, | |
}); | |
Object.defineProperty(result, AS_OBJECT_KEY, { | |
enumerable: false, configurable: true, value: {}, writable: false, | |
}); | |
Object.defineProperty(result, AS_STRING_KEY, { | |
enumerable: false, configurable: true, value: {}, writable: false, | |
}); | |
for (const [key, value] of parsedXML) { | |
if (key === ATTRIBUTE_NODE_KEY) { | |
if (!options.skipAttributes) { | |
if (options.removeNamespaces) { | |
result[key] = Object.fromEntries(/** @type {[string,string][]} */ (value) | |
.map(([attrKey, attrValue]) => [attrKey.replace(/^[^:]*:/, ''), attrValue])); | |
} else { | |
result[key] = Object.fromEntries(/** @type {[string,string][]} */ (value)); | |
} | |
} | |
continue; | |
} | |
let outKey; | |
if (key === CDATA_NODE_KEY) { | |
outKey = CONTENT_NODE_KEY; | |
} else if (options.removeNamespaces) { | |
outKey = key.replace(/^[^:]*:/, ''); | |
} else { | |
outKey = key; | |
} | |
let flattenedValue; | |
let typeofFlattenedValue = 'string'; | |
if (typeof value !== 'string') { | |
flattenedValue = flattenParsedXML(value, options); | |
typeofFlattenedValue = typeof flattenedValue; | |
if (typeofFlattenedValue === 'undefined') { | |
if (outKey in result === false) { | |
if (options.flattenArrays) { | |
result[outKey] = null; | |
} else { | |
result[outKey] = []; | |
} | |
} | |
continue; | |
} | |
} else { | |
flattenedValue = value; | |
} | |
if (outKey in result === false) { | |
result[AS_ARRAY_KEY][outKey] = [flattenedValue]; | |
if (typeofFlattenedValue === 'string') { | |
result[AS_OBJECT_KEY][outKey] = { $: flattenedValue }; | |
result[AS_STRING_KEY][outKey] = flattenedValue; | |
} else { | |
result[AS_OBJECT_KEY][outKey] = flattenedValue; | |
} | |
if (options.flattenArrays !== false) { | |
result[outKey] = flattenedValue; | |
} else { | |
result[outKey] = [flattenedValue]; | |
} | |
} else { | |
result[AS_ARRAY_KEY][outKey].push(flattenedValue); | |
if (options.flattenArrays !== false && !Array.isArray(result[outKey])) { | |
result[outKey] = [result[outKey], flattenedValue]; | |
} else { | |
result[outKey].push(flattenedValue); | |
} | |
} | |
} | |
if (options.mergeContentNodes !== false && Array.isArray(result[CONTENT_NODE_KEY])) { | |
result[CONTENT_NODE_KEY] = result[CONTENT_NODE_KEY].join(''); | |
} | |
if (options.flattenContent !== false && CONTENT_NODE_KEY in result && Object.keys(result).length === 1) { | |
return result[CONTENT_NODE_KEY]; | |
} | |
return result; | |
} | |
/** | |
* @param {string} input | |
* @return {TupleTree<string>} | |
*/ | |
export function parseXMLAsEntries(input) { | |
const [, value] = parseXMLNode(input); | |
return /** @type {TupleTree<string>} */ (value); | |
} | |
/** | |
* @param {string|TupleTree<string>} input | |
* @param {ParseXMLFlattenOptions} [flattenOptions] | |
* @return {XMLObject<unknown>} | |
*/ | |
export function parseXMLAsObject(input, flattenOptions) { | |
const entries = (typeof input === 'string') ? parseXMLAsEntries(input) : input; | |
return flattenParsedXML(entries, flattenOptions); | |
} | |
/** | |
* @param {string} input | |
* @param {ParseXMLFlattenOptions} [flattenOptions] | |
* @return {TupleTree<string>|XMLObject<unknown>} | |
*/ | |
export function parseXML(input, flattenOptions) { | |
const entries = parseXMLAsEntries(input); | |
if (flattenOptions) return flattenParsedXML(entries, flattenOptions); | |
return entries; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment