Last active
March 24, 2024 13:53
-
-
Save nberlette/d00faf7253a5f76ec2893cb7a51647ec to your computer and use it in GitHub Desktop.
XMLFormatter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export enum EOL { | |
CRLF = "\r\n", | |
CR = "\r", | |
LF = "\n", | |
} | |
export interface Options { | |
newLine?: EOL | `${EOL}`; | |
lineWidth?: number; | |
tabSize?: number; | |
useTabs?: boolean; | |
splitNS?: boolean; | |
finalNewLine?: boolean; | |
removeComments?: boolean; | |
verbose?: boolean; | |
debug?: boolean; | |
} | |
/** | |
* Internal tool for minifying or formatting XML and HTML data. | |
* | |
* Based on pretty-data. | |
* | |
* @see https://github.com/vkiryukhin/pretty-data | |
*/ | |
export class XMLFormatter { | |
static readonly options = { | |
useTabs: false, | |
splitNS: true, | |
tabSize: 4, | |
newLine: "\n", | |
finalNewLine: true, | |
removeComments: false, | |
verbose: false, | |
debug: false, | |
} satisfies Options; | |
static #default?: XMLFormatter; | |
static get default(): XMLFormatter { | |
return XMLFormatter.#default ??= new XMLFormatter(XMLFormatter.options); | |
} | |
static format( | |
xml: string, | |
options: Options = XMLFormatter.options, | |
): string { | |
return new XMLFormatter(options).format(xml); | |
} | |
static minify( | |
xml: string, | |
options: Options = XMLFormatter.options, | |
): string { | |
return new XMLFormatter(options).minify(xml); | |
} | |
static from(options: Options = XMLFormatter.options) { | |
return new XMLFormatter(options); | |
} | |
constructor(options: XMLFormatter.Options = {}) { | |
const opt = { ...XMLFormatter.options, ...options } as Required<Options>; | |
const { | |
newLine, | |
useTabs, | |
tabSize, | |
splitNS, | |
finalNewLine, | |
verbose, | |
debug, | |
removeComments, | |
} = opt; | |
Object.assign(this, { | |
newLine, | |
useTabs, | |
tabSize, | |
splitNS, | |
verbose, | |
debug, | |
finalNewLine, | |
removeComments, | |
}); | |
} | |
#lineWidth = 80; | |
#newLine: EOL | `${EOL}` = EOL.LF; | |
#removeComments = false; | |
#splitNS = true; | |
#tabSize = 4; | |
#useTabs = false; | |
#verbose = false; | |
#debug = false; | |
#finalNewLine = true; | |
public get indent(): string { | |
return this.useTabs ? "\t" : " ".repeat(this.tabSize); | |
} | |
public get useTabs(): boolean { | |
return this.#useTabs; | |
} | |
public set useTabs(value: boolean) { | |
this.#useTabs = Boolean(value); | |
} | |
public get tabSize(): number { | |
return this.#tabSize; | |
} | |
public set tabSize(value: number) { | |
if (typeof value !== "number" || isNaN(value)) { | |
throw new TypeError("[XMLFormatter] 'tabSize' must be a number"); | |
} | |
if (value < 0 || value > 8) { | |
throw new RangeError("[XMLFormatter] 'tabSize' must be between 0 and 8"); | |
} | |
this.#tabSize = value; | |
} | |
public get splitNS(): boolean { | |
return this.#splitNS; | |
} | |
public set splitNS(value: boolean) { | |
this.#splitNS = Boolean(value); | |
} | |
public get removeComments(): boolean { | |
return this.#removeComments; | |
} | |
public set removeComments(value: boolean) { | |
this.#removeComments = Boolean(value); | |
} | |
public get lineWidth(): number { | |
return this.#lineWidth; | |
} | |
public set lineWidth(value: number) { | |
if (typeof value !== "number" || isNaN(value)) { | |
throw new TypeError("[XMLFormatter] 'lineWidth' must be a number"); | |
} | |
if (value < 0 || value > 1000) { | |
throw new RangeError( | |
"[XMLFormatter] 'lineWidth' must be between 0 and 1000", | |
); | |
} | |
this.#lineWidth = value; | |
} | |
public get newLine(): EOL | `${EOL}` { | |
return this.#newLine; | |
} | |
public set newLine(value: EOL | `${EOL}`) { | |
if (![EOL.CRLF, EOL.CR, EOL.LF].includes(value as EOL)) { | |
throw new TypeError( | |
"[XMLFormatter] 'newLine' must be either '\\r\\n', '\\r', or '\\n'.", | |
); | |
} | |
this.#newLine = value; | |
} | |
public get finalNewLine(): boolean { | |
return this.#finalNewLine; | |
} | |
public set finalNewLine(value: boolean) { | |
this.#finalNewLine = Boolean(value); | |
} | |
public get verbose(): boolean { | |
return this.#verbose ??= false; | |
} | |
public set verbose(value: boolean) { | |
this.#verbose = Boolean(value); | |
} | |
public get debug(): boolean { | |
return this.#debug ??= false; | |
} | |
public set debug(value: boolean) { | |
this.#debug = Boolean(value); | |
} | |
public format(xml: string): string { | |
const DELIM = "~::~"; | |
xml = this.minify(xml, false).replace(/(<)/g, `${DELIM}$1`); | |
if (this.splitNS) xml = xml.replace(/xmlns([:=])/g, `${DELIM}xmlns$1`); | |
const parts = xml.split(DELIM); | |
if (this.debug) console.log(parts); | |
let inComment = false, level = 0, output = ""; | |
for (let i = 0; i < parts.length; i++) { | |
// <! | |
if (~parts[i].search(/<!/)) { | |
output += this.#getIndent(level, parts[i]); | |
// end <! | |
inComment = !( | |
~parts[i].search(/-->/) || ~parts[i].search(/\]>/) || ~parts[i].search(/!DOCTYPE/i) | |
); | |
} else if (~parts[i].search(/-->/) || ~parts[i].search(/\]>/)) { // end <! | |
output += parts[i], inComment = false; | |
} else if ( | |
// <elm></elm> | |
/^<(\w|:)/.test(parts[i - 1]) && | |
/^<\/(\w|:)/.test(parts[i]) && | |
/^<[\w:\-.,/]+/.exec(parts[i - 1])?.[0] == | |
/^<\/[\w:\-.,]+/.exec(parts[i])?.[0]?.replace(/\//, "") | |
) { | |
output += parts[i]; | |
!inComment && --level; | |
} else if (!~parts[i].search(/<\//) && !~parts[i].search(/\/>/)) { | |
if (~parts[i].search(/<(\w|:)/)) { // <elm> | |
output += inComment ? parts[i] : this.#getIndent(level++, parts[i]); | |
} | |
} else if (~parts[i].search(/<(\w|:)/) && ~parts[i].search(/<\//)) { | |
// <elm>...</elm> | |
output += inComment ? parts[i] : this.#getIndent(level, parts[i]); | |
} else if (~parts[i].search(/<\//)) { // </elm> | |
output += inComment ? parts[i] : this.#getIndent(level--, parts[i]); | |
} else if (!this.splitNS || !~parts[i].search(/xmlns[:=]/)) { | |
if (~parts[i].search(/\/>/)) { // <elm /> | |
output += inComment ? parts[i] : this.#getIndent(level, parts[i]); | |
} | |
} else if (~parts[i].search(/\/>/) && ~parts[i].search(/xmlns[:=]/)) { | |
if (this.splitNS) { // xmlns /> | |
output += inComment ? parts[i] : this.#getIndent(level--, parts[i]); | |
} | |
} else if (~parts[i].search(/<\?/)) { // <?xml ... ?> | |
output += this.#getIndent(level, parts[i]); | |
} else if (~parts[i].search(/xmlns\:/) || ~parts[i].search(/xmlns\=/)) { | |
if (this.splitNS) output += this.#getIndent(level, parts[i]); // xmlns | |
} else { | |
output += parts[i]; | |
} | |
} | |
// remove leading newline | |
const LEADING_LF_RE = /^(\r\n|\r|\n)+/; | |
const TRAILING_LF_RE = /(\r\n|\r|\n)+$/; | |
output = output.replace(LEADING_LF_RE, ""); | |
// remove trailing newlines | |
output = output.replace(/[\r\n]+$/, ""); | |
// add final newline, if desired | |
if (this.finalNewLine) output += this.newLine; | |
return output; | |
} | |
public minify(xml: string, removeComments = this.removeComments): string { | |
removeComments ??= false; | |
// all line breaks outside of CDATA elements | |
xml = this.#stripLineBreaks(xml); | |
// remove comments | |
if (removeComments) { | |
xml = xml.replace( | |
/\<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)\>/g, | |
"", | |
); | |
} | |
// insignificant whitespace between tags | |
xml = xml.replace(/>\s{0,}</g, "><"); | |
// spaces between attributes | |
xml = xml.replace(/"\s+(?=[^\s]+=)/g, '" '); | |
// spaces between the last attribute and tag close (>) | |
xml = xml.replace(/"\s+(?=>)/g, '"');e | |
// spaces between the last attribute and tag close (/>) | |
xml = xml.replace(/"\s+(?=\/>)/g, '" '); | |
// spaces between the node name and the first attribute | |
xml = xml.replace(/[^ <>="]\s+[^ <>="]+=/g, (m) => m.replace(/\s+/g, " ")); | |
// final new line | |
xml = xml.replace(/\s+$/, ""); | |
if (this.finalNewLine) xml += this.newLine; | |
return xml; | |
} | |
#getIndent(level: number, trailingValue = ""): string { | |
return `${this.newLine}${this.indent.repeat(level)}${trailingValue}`; | |
} | |
#stripLineBreaks(xml: string): string { | |
let output = ""; | |
let inCdata = false, inComment = false; | |
let inTag = false, inTagName = false, inAttribute = false; | |
const reset = () => { | |
// deno-fmt-ignore | |
inTag = inCdata = inTagName = inComment = inAttribute = false; | |
}; | |
for (let i = 0; i < xml.length; i++) { | |
const char = xml[i], prev = xml[i - 1], next = xml[i + 1]; | |
if ( | |
!inCdata && !inComment && !inTag && char == "!" && | |
(xml.slice(i, 8) == "![CDATA[" || xml.slice(i, 3) == "!--") | |
) { | |
inCdata = true; | |
inComment = xml.slice(i, 3) == "!--"; | |
} else if ( | |
inCdata && !inComment && !inTagName && !inAttribute && ( | |
(char == "]" && (xml.slice(i, 3) == "]]>")) || | |
(char == "-" && (xml.slice(i, 3) == "-->")) | |
) | |
) { | |
reset(); | |
} else if (char.search(/[\r\n]/g) > -1 && !inCdata && !inComment) { | |
if ( | |
/\r/.test(char) && /\S|\r|\n/.test(prev) && | |
/\S|\r|\n/.test(xml.charAt(i + this.newLine.length)) | |
) { | |
output += char; | |
} else if ( | |
/\n/.test(char) && | |
/\S|\r|\n/.test(xml.charAt(i - this.newLine.length)) && | |
/\S|\r|\n/.test(next) | |
) { | |
output += char; | |
} | |
continue; | |
} | |
output += char; | |
} | |
return output; | |
} | |
} | |
export declare namespace XMLFormatter { | |
export type { Options }; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment