nberlette · March 24, 2024 13:53
diff --git a/xml-formatter.ts b/xml-formatter.ts
 export enum EOL {
  CRLF = "\r\n",
  CR = "\r",
  LF = "\n",
 }

 export interface Options {
  newLine?: EOL | `${EOL}`;
  lineWidth?: number;
  tabSize?: number;
  useTabs?: boolean;
  splitNS?: boolean;
  finalNewLine?: boolean;
  removeComments?: boolean;
  verbose?: boolean;
  debug?: boolean;
 }

 /**
 * Internal tool for minifying or formatting XML and HTML data.
 *
 * Based on pretty-data.
 *
 * @see https://github.com/vkiryukhin/pretty-data
 */
 export class XMLFormatter {
  static readonly options = {
    useTabs: false,
    splitNS: true,
    tabSize: 4,
    newLine: "\n",
    finalNewLine: true,
    removeComments: false,
    verbose: false,
    debug: false,
  } satisfies Options;

  static #default?: XMLFormatter;

  static get default(): XMLFormatter {
    return XMLFormatter.#default ??= new XMLFormatter(XMLFormatter.options);
  }

  static format(
    xml: string,
    options: Options = XMLFormatter.options,
  ): string {
    return new XMLFormatter(options).format(xml);
  }

  static minify(
    xml: string,
    options: Options = XMLFormatter.options,
  ): string {
    return new XMLFormatter(options).minify(xml);
  }

  static from(options: Options = XMLFormatter.options) {
    return new XMLFormatter(options);
  }

  constructor(options: XMLFormatter.Options = {}) {
    const opt = { ...XMLFormatter.options, ...options } as Required<Options>;

    const {
      newLine,
      useTabs,
      tabSize,
      splitNS,
      finalNewLine,
      verbose,
      debug,
      removeComments,
    } = opt;

    Object.assign(this, {
      newLine,
      useTabs,
      tabSize,
      splitNS,
      verbose,
      debug,
      finalNewLine,
      removeComments,
    });
  }

  #lineWidth = 80;
  #newLine: EOL | `${EOL}` = EOL.LF;
  #removeComments = false;
  #splitNS = true;
  #tabSize = 4;
  #useTabs = false;
  #verbose = false;
  #debug = false;
  #finalNewLine = true;

  public get indent(): string {
    return this.useTabs ? "\t" : " ".repeat(this.tabSize);
  }

  public get useTabs(): boolean {
    return this.#useTabs;
  }

  public set useTabs(value: boolean) {
    this.#useTabs = Boolean(value);
  }

  public get tabSize(): number {
    return this.#tabSize;
  }

  public set tabSize(value: number) {
    if (typeof value !== "number" || isNaN(value)) {
      throw new TypeError("[XMLFormatter] 'tabSize' must be a number");
    }
    if (value < 0 || value > 8) {
      throw new RangeError("[XMLFormatter] 'tabSize' must be between 0 and 8");
    }
    this.#tabSize = value;
  }

  public get splitNS(): boolean {
    return this.#splitNS;
  }

  public set splitNS(value: boolean) {
    this.#splitNS = Boolean(value);
  }

  public get removeComments(): boolean {
    return this.#removeComments;
  }

  public set removeComments(value: boolean) {
    this.#removeComments = Boolean(value);
  }

  public get lineWidth(): number {
    return this.#lineWidth;
  }

  public set lineWidth(value: number) {
    if (typeof value !== "number" || isNaN(value)) {
      throw new TypeError("[XMLFormatter] 'lineWidth' must be a number");
    }
    if (value < 0 || value > 1000) {
      throw new RangeError(
        "[XMLFormatter] 'lineWidth' must be between 0 and 1000",
      );
    }
    this.#lineWidth = value;
  }

  public get newLine(): EOL | `${EOL}` {
    return this.#newLine;
  }

  public set newLine(value: EOL | `${EOL}`) {
    if (![EOL.CRLF, EOL.CR, EOL.LF].includes(value as EOL)) {
      throw new TypeError(
        "[XMLFormatter] 'newLine' must be either '\\r\\n', '\\r', or '\\n'.",
      );
    }
    this.#newLine = value;
  }

  public get finalNewLine(): boolean {
    return this.#finalNewLine;
  }

  public set finalNewLine(value: boolean) {
    this.#finalNewLine = Boolean(value);
  }

  public get verbose(): boolean {
    return this.#verbose ??= false;
  }

  public set verbose(value: boolean) {
    this.#verbose = Boolean(value);
  }

  public get debug(): boolean {
    return this.#debug ??= false;
  }

  public set debug(value: boolean) {
    this.#debug = Boolean(value);
  }

  public format(xml: string): string {
    const DELIM = "~::~";
    xml = this.minify(xml, false).replace(/(<)/g, `${DELIM}$1`);
    if (this.splitNS) xml = xml.replace(/xmlns([:=])/g, `${DELIM}xmlns$1`);

    const parts = xml.split(DELIM);
    if (this.debug) console.log(parts);

    let inComment = false, level = 0, output = "";
    for (let i = 0; i < parts.length; i++) {
      // <!
      if (~parts[i].search(/<!/)) {
        output += this.#getIndent(level, parts[i]);
        // end <!
        inComment = !(
          ~parts[i].search(/-->/) || ~parts[i].search(/\]>/) || ~parts[i].search(/!DOCTYPE/i)
        );
      } else if (~parts[i].search(/-->/) || ~parts[i].search(/\]>/)) { // end <!
        output += parts[i], inComment = false;
      } else if (
        // <elm></elm>
        /^<(\w|:)/.test(parts[i - 1]) &&
        /^<\/(\w|:)/.test(parts[i]) &&
        /^<[\w:\-.,/]+/.exec(parts[i - 1])?.[0] ==
        /^<\/[\w:\-.,]+/.exec(parts[i])?.[0]?.replace(/\//, "")
      ) {
        output += parts[i];
        !inComment && --level;
      } else if (!~parts[i].search(/<\//) && !~parts[i].search(/\/>/)) {
        if (~parts[i].search(/<(\w|:)/)) { // <elm>
          output += inComment ? parts[i] : this.#getIndent(level++, parts[i]);
        }
      } else if (~parts[i].search(/<(\w|:)/) && ~parts[i].search(/<\//)) {
        // <elm>...</elm>
        output += inComment ? parts[i] : this.#getIndent(level, parts[i]);
      } else if (~parts[i].search(/<\//)) { // </elm>
        output += inComment ? parts[i] : this.#getIndent(level--, parts[i]);
      } else if (!this.splitNS || !~parts[i].search(/xmlns[:=]/)) {
        if (~parts[i].search(/\/>/)) { // <elm />
          output += inComment ? parts[i] : this.#getIndent(level, parts[i]);
        }
      } else if (~parts[i].search(/\/>/) && ~parts[i].search(/xmlns[:=]/)) {
        if (this.splitNS) { // xmlns />
          output += inComment ? parts[i] : this.#getIndent(level--, parts[i]);
        }
      } else if (~parts[i].search(/<\?/)) { // <?xml ... ?>
        output += this.#getIndent(level, parts[i]);
      } else if (~parts[i].search(/xmlns\:/) || ~parts[i].search(/xmlns\=/)) {
        if (this.splitNS) output += this.#getIndent(level, parts[i]); // xmlns
      } else {
        output += parts[i];
      }
    }

    // remove leading newline
    const LEADING_LF_RE = /^(\r\n|\r|\n)+/;
    const TRAILING_LF_RE = /(\r\n|\r|\n)+$/;
    output = output.replace(LEADING_LF_RE, "");
    // remove trailing newlines
    output = output.replace(/[\r\n]+$/, "");

    // add final newline, if desired
    if (this.finalNewLine) output += this.newLine;

    return output;
  }

  public minify(xml: string, removeComments = this.removeComments): string {
    removeComments ??= false;

    // all line breaks outside of CDATA elements
    xml = this.#stripLineBreaks(xml);

    // remove comments
    if (removeComments) {
      xml = xml.replace(
        /\<![ \r\n\t]*(--([^\-]|[\r\n]|-[^\-])*--[ \r\n\t]*)\>/g,
        "",
      );
    }

    // insignificant whitespace between tags
    xml = xml.replace(/>\s{0,}</g, "><");
    // spaces between attributes
    xml = xml.replace(/"\s+(?=[^\s]+=)/g, '" ');
    // spaces between the last attribute and tag close (>)
    xml = xml.replace(/"\s+(?=>)/g, '"');e
    // spaces between the last attribute and tag close (/>)
    xml = xml.replace(/"\s+(?=\/>)/g, '" ');
    // spaces between the node name and the first attribute
    xml = xml.replace(/[^ <>="]\s+[^ <>="]+=/g, (m) => m.replace(/\s+/g, " "));

    // final new line
    xml = xml.replace(/\s+$/, "");
    if (this.finalNewLine) xml += this.newLine;

    return xml;
  }

  #getIndent(level: number, trailingValue = ""): string {
    return `${this.newLine}${this.indent.repeat(level)}${trailingValue}`;
  }

  #stripLineBreaks(xml: string): string {
    let output = "";
    let inCdata = false, inComment = false;
    let inTag = false, inTagName = false, inAttribute = false;

    const reset = () => {
      // deno-fmt-ignore
      inTag = inCdata = inTagName = inComment = inAttribute = false;
    };

    for (let i = 0; i < xml.length; i++) {
      const char = xml[i], prev = xml[i - 1], next = xml[i + 1];

      if (
        !inCdata && !inComment && !inTag && char == "!" &&
        (xml.slice(i, 8) == "![CDATA[" || xml.slice(i, 3) == "!--")
      ) {
        inCdata = true;
        inComment = xml.slice(i, 3) == "!--";
      } else if (
        inCdata && !inComment && !inTagName && !inAttribute && (
          (char == "]" && (xml.slice(i, 3) == "]]>")) ||
          (char == "-" && (xml.slice(i, 3) == "-->"))
        )
      ) {
        reset();
      } else if (char.search(/[\r\n]/g) > -1 && !inCdata && !inComment) {
        if (
          /\r/.test(char) && /\S|\r|\n/.test(prev) &&
          /\S|\r|\n/.test(xml.charAt(i + this.newLine.length))
        ) {
          output += char;
        } else if (
          /\n/.test(char) &&
          /\S|\r|\n/.test(xml.charAt(i - this.newLine.length)) &&
          /\S|\r|\n/.test(next)
        ) {
          output += char;
        }
        continue;
      }
      output += char;
    }
    return output;
  }
 }

 export declare namespace XMLFormatter {
  export type { Options };
 }
	export enum EOL {
	CRLF = "\r\n",
	CR = "\r",
	LF = "\n",
	}

	export interface Options {
	newLine?: EOL \| `${EOL}`;
	lineWidth?: number;
	tabSize?: number;
	useTabs?: boolean;
	splitNS?: boolean;
	finalNewLine?: boolean;
	removeComments?: boolean;
	verbose?: boolean;
	debug?: boolean;
	}

	/**
	* Internal tool for minifying or formatting XML and HTML data.
	*
	* Based on pretty-data.
	*
	* @see https://github.com/vkiryukhin/pretty-data
	*/
	export class XMLFormatter {
	static readonly options = {
	useTabs: false,
	splitNS: true,
	tabSize: 4,
	newLine: "\n",
	finalNewLine: true,
	removeComments: false,
	verbose: false,
	debug: false,
	} satisfies Options;

	static #default?: XMLFormatter;

	static get default(): XMLFormatter {
	return XMLFormatter.#default ??= new XMLFormatter(XMLFormatter.options);
	}

	static format(
	xml: string,
	options: Options = XMLFormatter.options,
	): string {
	return new XMLFormatter(options).format(xml);
	}

	static minify(
	xml: string,
	options: Options = XMLFormatter.options,
	): string {
	return new XMLFormatter(options).minify(xml);
	}

	static from(options: Options = XMLFormatter.options) {
	return new XMLFormatter(options);
	}

	constructor(options: XMLFormatter.Options = {}) {
	const opt = { ...XMLFormatter.options, ...options } as Required<Options>;

	const {
	newLine,
	useTabs,
	tabSize,
	splitNS,
	finalNewLine,
	verbose,
	debug,
	removeComments,
	} = opt;

	Object.assign(this, {
	newLine,
	useTabs,
	tabSize,
	splitNS,
	verbose,
	debug,
	finalNewLine,
	removeComments,
	});
	}

	#lineWidth = 80;
	#newLine: EOL \| `${EOL}` = EOL.LF;
	#removeComments = false;
	#splitNS = true;
	#tabSize = 4;
	#useTabs = false;
	#verbose = false;
	#debug = false;
	#finalNewLine = true;

	public get indent(): string {
	return this.useTabs ? "\t" : " ".repeat(this.tabSize);
	}

	public get useTabs(): boolean {
	return this.#useTabs;
	}

	public set useTabs(value: boolean) {
	this.#useTabs = Boolean(value);
	}

	public get tabSize(): number {
	return this.#tabSize;
	}

	public set tabSize(value: number) {
	if (typeof value !== "number" \|\| isNaN(value)) {
	throw new TypeError("[XMLFormatter] 'tabSize' must be a number");
	}
	if (value < 0 \|\| value > 8) {
	throw new RangeError("[XMLFormatter] 'tabSize' must be between 0 and 8");
	}
	this.#tabSize = value;
	}

	public get splitNS(): boolean {
	return this.#splitNS;
	}

	public set splitNS(value: boolean) {
	this.#splitNS = Boolean(value);
	}

	public get removeComments(): boolean {
	return this.#removeComments;
	}

	public set removeComments(value: boolean) {
	this.#removeComments = Boolean(value);
	}

	public get lineWidth(): number {
	return this.#lineWidth;
	}

	public set lineWidth(value: number) {
	if (typeof value !== "number" \|\| isNaN(value)) {
	throw new TypeError("[XMLFormatter] 'lineWidth' must be a number");
	}
	if (value < 0 \|\| value > 1000) {
	throw new RangeError(
	"[XMLFormatter] 'lineWidth' must be between 0 and 1000",
	);
	}
	this.#lineWidth = value;
	}

	public get newLine(): EOL \| `${EOL}` {
	return this.#newLine;
	}

	public set newLine(value: EOL \| `${EOL}`) {
	if (![EOL.CRLF, EOL.CR, EOL.LF].includes(value as EOL)) {
	throw new TypeError(
	"[XMLFormatter] 'newLine' must be either '\\r\\n', '\\r', or '\\n'.",
	);
	}
	this.#newLine = value;
	}

	public get finalNewLine(): boolean {
	return this.#finalNewLine;
	}

	public set finalNewLine(value: boolean) {
	this.#finalNewLine = Boolean(value);
	}

	public get verbose(): boolean {
	return this.#verbose ??= false;
	}

	public set verbose(value: boolean) {
	this.#verbose = Boolean(value);
	}

	public get debug(): boolean {
	return this.#debug ??= false;
	}

	public set debug(value: boolean) {
	this.#debug = Boolean(value);
	}

	public format(xml: string): string {
	const DELIM = "~::~";
	xml = this.minify(xml, false).replace(/(<)/g, `${DELIM}$1`);
	if (this.splitNS) xml = xml.replace(/xmlns([:=])/g, `${DELIM}xmlns$1`);

	const parts = xml.split(DELIM);
	if (this.debug) console.log(parts);

	let inComment = false, level = 0, output = "";
	for (let i = 0; i < parts.length; i++) {
	// <!
	if (~parts[i].search(/<!/)) {
	output += this.#getIndent(level, parts[i]);
	// end <!
	inComment = !(
	~parts[i].search(/-->/) \|\| ~parts[i].search(/\]>/) \|\| ~parts[i].search(/!DOCTYPE/i)
	);
	} else if (~parts[i].search(/-->/) \|\| ~parts[i].search(/\]>/)) { // end <!
	output += parts[i], inComment = false;
	} else if (
	// <elm></elm>
	/^<(\w\|:)/.test(parts[i - 1]) &&
	/^<\/(\w\|:)/.test(parts[i]) &&
	/^<[\w:\-.,/]+/.exec(parts[i - 1])?.[0] ==
	/^<\/[\w:\-.,]+/.exec(parts[i])?.[0]?.replace(/\//, "")
	) {
	output += parts[i];
	!inComment && --level;
	} else if (!~parts[i].search(/<\//) && !~parts[i].search(/\/>/)) {
	if (~parts[i].search(/<(\w\|:)/)) { // <elm>
	output += inComment ? parts[i] : this.#getIndent(level++, parts[i]);
	}
	} else if (~parts[i].search(/<(\w\|:)/) && ~parts[i].search(/<\//)) {
	// <elm>...</elm>
	output += inComment ? parts[i] : this.#getIndent(level, parts[i]);
	} else if (~parts[i].search(/<\//)) { // </elm>
	output += inComment ? parts[i] : this.#getIndent(level--, parts[i]);
	} else if (!this.splitNS \|\| !~parts[i].search(/xmlns[:=]/)) {
	if (~parts[i].search(/\/>/)) { // <elm />
	output += inComment ? parts[i] : this.#getIndent(level, parts[i]);
	}
	} else if (~parts[i].search(/\/>/) && ~parts[i].search(/xmlns[:=]/)) {
	if (this.splitNS) { // xmlns />
	output += inComment ? parts[i] : this.#getIndent(level--, parts[i]);
	}
	} else if (~parts[i].search(/<\?/)) { // <?xml ... ?>
	output += this.#getIndent(level, parts[i]);
	} else if (~parts[i].search(/xmlns\:/) \|\| ~parts[i].search(/xmlns\=/)) {
	if (this.splitNS) output += this.#getIndent(level, parts[i]); // xmlns
	} else {
	output += parts[i];
	}
	}

	// remove leading newline
	const LEADING_LF_RE = /^(\r\n\|\r\|\n)+/;
	const TRAILING_LF_RE = /(\r\n\|\r\|\n)+$/;
	output = output.replace(LEADING_LF_RE, "");
	// remove trailing newlines
	output = output.replace(/[\r\n]+$/, "");

	// add final newline, if desired
	if (this.finalNewLine) output += this.newLine;

	return output;
	}

	public minify(xml: string, removeComments = this.removeComments): string {
	removeComments ??= false;

	// all line breaks outside of CDATA elements
	xml = this.#stripLineBreaks(xml);

	// remove comments
	if (removeComments) {
	xml = xml.replace(
	/\<![ \r\n\t](--([^\-]\|[\r\n]\|-[^\-])--[ \r\n\t]*)\>/g,
	"",
	);
	}

	// insignificant whitespace between tags
	xml = xml.replace(/>\s{0,}</g, "><");
	// spaces between attributes
	xml = xml.replace(/"\s+(?=[^\s]+=)/g, '" ');
	// spaces between the last attribute and tag close (>)
	xml = xml.replace(/"\s+(?=>)/g, '"');e
	// spaces between the last attribute and tag close (/>)
	xml = xml.replace(/"\s+(?=\/>)/g, '" ');
	// spaces between the node name and the first attribute
	xml = xml.replace(/[^ <>="]\s+[^ <>="]+=/g, (m) => m.replace(/\s+/g, " "));

	// final new line
	xml = xml.replace(/\s+$/, "");
	if (this.finalNewLine) xml += this.newLine;

	return xml;
	}

	#getIndent(level: number, trailingValue = ""): string {
	return `${this.newLine}${this.indent.repeat(level)}${trailingValue}`;
	}

	#stripLineBreaks(xml: string): string {
	let output = "";
	let inCdata = false, inComment = false;
	let inTag = false, inTagName = false, inAttribute = false;

	const reset = () => {
	// deno-fmt-ignore
	inTag = inCdata = inTagName = inComment = inAttribute = false;
	};

	for (let i = 0; i < xml.length; i++) {
	const char = xml[i], prev = xml[i - 1], next = xml[i + 1];

	if (
	!inCdata && !inComment && !inTag && char == "!" &&
	(xml.slice(i, 8) == "![CDATA[" \|\| xml.slice(i, 3) == "!--")
	) {
	inCdata = true;
	inComment = xml.slice(i, 3) == "!--";
	} else if (
	inCdata && !inComment && !inTagName && !inAttribute && (
	(char == "]" && (xml.slice(i, 3) == "]]>")) \|\|
	(char == "-" && (xml.slice(i, 3) == "-->"))
	)
	) {
	reset();
	} else if (char.search(/[\r\n]/g) > -1 && !inCdata && !inComment) {
	if (
	/\r/.test(char) && /\S\|\r\|\n/.test(prev) &&
	/\S\|\r\|\n/.test(xml.charAt(i + this.newLine.length))
	) {
	output += char;
	} else if (
	/\n/.test(char) &&
	/\S\|\r\|\n/.test(xml.charAt(i - this.newLine.length)) &&
	/\S\|\r\|\n/.test(next)
	) {
	output += char;
	}
	continue;
	}
	output += char;
	}
	return output;
	}
	}

	export declare namespace XMLFormatter {
	export type { Options };
	}