celsowm · July 24, 2025 02:59
diff --git a/HtmlToDocxConverter.js b/HtmlToDocxConverter.js
 class HTMLtoDOCX {
  constructor(htmlString) {
    // 1. GARANTIR SEGURANÇA CONTRA CONFLITO DE 'NODE'
    // Em alguns bundlers, `Node` pode ser sobrescrito. Usar `window.Node` garante que estamos usando as constantes do DOM.
    this.nodeTypes = {
      ELEMENT_NODE: window.Node.ELEMENT_NODE,
      TEXT_NODE: window.Node.TEXT_NODE,
    };

    const parser = new DOMParser();
    this.doc = parser.parseFromString(htmlString, "text/html");
    this.numbering = this.createNumbering();
  }

  // Define a numeração para listas ordenadas (ol)
  createNumbering() {
    return new docx.Numbering({
      config: [{
        reference: "default-numbering",
        levels: [{
          level: 0,
          format: "decimal",
          text: "%1.",
          alignment: docx.AlignmentType.START,
          style: {
            paragraph: {
              indent: { left: 720, hanging: 360 },
            },
          },
        }, {
          level: 1,
          format: "lowerLetter",
          text: "%2.",
          alignment: docx.AlignmentType.START,
          style: {
            paragraph: {
              indent: { left: 1440, hanging: 360 },
            },
          },
        }, ],
      }, ],
    });
  }

  async createDocx() {
    const children = await this.processNodes(this.doc.body.childNodes);
    const doc = new docx.Document({
      numbering: this.numbering,
      sections: [{
        children: children
      }],
    });

    return docx.Packer.toBlob(doc);
  }

  // 2. PROCESSAMENTO ITERATIVO (AO INVÉS DE RECURSIVO) PARA PERFORMANCE
  // Usa uma pilha para evitar estouro em documentos grandes/profundos.
  async processNodes(nodes, initialContext = {}) {
    const output = [];
    const stack = [{
      nodes: Array.from(nodes).reverse(),
      context: initialContext
    }];
    const imagePromises = [];

    while (stack.length > 0) {
      const {
        nodes,
        context
      } = stack.pop();
      if (nodes.length === 0) continue;

      const node = nodes.pop();
      // Repõe os nós restantes na pilha para processamento posterior
      stack.push({
        nodes,
        context
      });

      // Processa o nó atual
      if (node.nodeType === this.nodeTypes.TEXT_NODE) {
        // Ignora nós de texto vazios
        if (node.nodeValue.trim() !== '') {
           output.push(new docx.Paragraph({ children: [new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " "))] }));
        }
        continue;
      }

      if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) {
        continue;
      }
      
      const tagName = node.tagName.toLowerCase();
      let childrenPromise;

      switch (tagName) {
        // 3. SUPORTE A TODOS OS CABEÇALHOS (h1-h6)
        case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
          output.push(new docx.Paragraph({
            text: node.textContent,
            heading: docx.HeadingLevel[`HEADING_${tagName.charAt(1)}`],
          }));
          break;

        case "p":
          childrenPromise = this.processParagraphChildren(node.childNodes);
          output.push(new docx.Paragraph({ children: await childrenPromise }));
          break;

        // 4. SUPORTE A LISTAS (ul, ol)
        case "ul":
        case "ol":
          const listContext = {
            isList: true,
            numbering: tagName === 'ol' ? { reference: "default-numbering", level: context.numbering ? context.numbering.level + 1 : 0 } : undefined,
            bullet: tagName === 'ul' ? { level: context.bullet ? context.bullet.level + 1 : 0 } : undefined
          };
          stack.push({ nodes: Array.from(node.childNodes).reverse(), context: listContext });
          break;

        case "li":
          // `li` deve ser tratado como parágrafo, mas com estilo de lista
          const liChildren = await this.processParagraphChildren(node.childNodes);
          output.push(new docx.Paragraph({
            children: liChildren,
            numbering: context.numbering,
            bullet: context.bullet,
          }));
          break;
        
        // 5. SUPORTE A TABELAS
        case "table":
          const table = await this.createTable(node);
          output.push(table);
          break;

        // 6. SUPORTE A IMAGENS
        case "img":
          const image = await this.createImage(node.src);
          if (image) {
              output.push(new docx.Paragraph({ children: [image] }));
          }
          break;
          
        default:
          // Trata outras tags de bloco como parágrafos
          if (node.childNodes.length > 0) {
            stack.push({ nodes: Array.from(node.childNodes).reverse(), context });
          }
      }
    }
    return output;
  }

  // Processa os filhos de um parágrafo (texto, strong, em, br, links)
  async processParagraphChildren(nodes) {
    const runs = [];
    for (const node of nodes) {
      if (node.nodeType === this.nodeTypes.TEXT_NODE) {
        runs.push(new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " ")));
        continue;
      }
      if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) continue;

      const tagName = node.tagName.toLowerCase();
      const childRuns = await this.processParagraphChildren(node.childNodes);

      switch (tagName) {
        case "b": case "strong":
          childRuns.forEach(run => run.options.bold = true);
          runs.push(...childRuns);
          break;
        case "i": case "em":
          childRuns.forEach(run => run.options.italics = true);
          runs.push(...childRuns);
          break;

        // 7. SUPORTE A LINKS
        case "a":
          runs.push(new docx.ExternalHyperlink({
            link: node.getAttribute('href'),
            children: childRuns.length > 0 ? childRuns : [new docx.TextRun({ text: node.textContent, style: "Hyperlink" })],
          }));
          break;
        
        // 8. SUPORTE A QUEBRA DE LINHA
        case "br":
          runs.push(new docx.TextRun({ break: 1 }));
          break;

        default:
          runs.push(...childRuns);
      }
    }
    return runs;
  }

  async createImage(src) {
    try {
      let data;
      if (src.startsWith('data:image')) {
        // Converte de Base64 para buffer
        const base64 = src.split(',')[1];
        const binaryStr = atob(base64);
        const len = binaryStr.length;
        const bytes = new Uint8Array(len);
        for (let i = 0; i < len; i++) {
          bytes[i] = binaryStr.charCodeAt(i);
        }
        data = bytes.buffer;
      } else {
        // Busca a imagem de uma URL
        const res = await fetch(src);
        data = await res.arrayBuffer();
      }
      // Dimensões podem ser adicionadas aqui se necessário
      return new docx.ImageRun({ data, transformation: { width: 300, height: 200 }});
    } catch (error) {
      console.error("Não foi possível processar a imagem:", src, error);
      return null;
    }
  }
  
  async createTable(node) {
      const rows = [];
      for (const rowNode of node.querySelectorAll('tr')) {
          const cells = [];
          for (const cellNode of rowNode.querySelectorAll('th, td')) {
              const cellChildren = await this.processNodes(cellNode.childNodes);
              cells.push(new docx.TableCell({
                  children: cellChildren,
                  shading: cellNode.tagName.toLowerCase() === 'th' ? { fill: "E0E0E0" } : undefined
              }));
          }
          rows.push(new docx.TableRow({ children: cells }));
      }
      return new docx.Table({ rows });
  }
 }
	class HTMLtoDOCX {
	constructor(htmlString) {
	// 1. GARANTIR SEGURANÇA CONTRA CONFLITO DE 'NODE'
	// Em alguns bundlers, `Node` pode ser sobrescrito. Usar `window.Node` garante que estamos usando as constantes do DOM.
	this.nodeTypes = {
	ELEMENT_NODE: window.Node.ELEMENT_NODE,
	TEXT_NODE: window.Node.TEXT_NODE,
	};

	const parser = new DOMParser();
	this.doc = parser.parseFromString(htmlString, "text/html");
	this.numbering = this.createNumbering();
	}

	// Define a numeração para listas ordenadas (ol)
	createNumbering() {
	return new docx.Numbering({
	config: [{
	reference: "default-numbering",
	levels: [{
	level: 0,
	format: "decimal",
	text: "%1.",
	alignment: docx.AlignmentType.START,
	style: {
	paragraph: {
	indent: { left: 720, hanging: 360 },
	},
	},
	}, {
	level: 1,
	format: "lowerLetter",
	text: "%2.",
	alignment: docx.AlignmentType.START,
	style: {
	paragraph: {
	indent: { left: 1440, hanging: 360 },
	},
	},
	}, ],
	}, ],
	});
	}

	async createDocx() {
	const children = await this.processNodes(this.doc.body.childNodes);
	const doc = new docx.Document({
	numbering: this.numbering,
	sections: [{
	children: children
	}],
	});

	return docx.Packer.toBlob(doc);
	}

	// 2. PROCESSAMENTO ITERATIVO (AO INVÉS DE RECURSIVO) PARA PERFORMANCE
	// Usa uma pilha para evitar estouro em documentos grandes/profundos.
	async processNodes(nodes, initialContext = {}) {
	const output = [];
	const stack = [{
	nodes: Array.from(nodes).reverse(),
	context: initialContext
	}];
	const imagePromises = [];

	while (stack.length > 0) {
	const {
	nodes,
	context
	} = stack.pop();
	if (nodes.length === 0) continue;

	const node = nodes.pop();
	// Repõe os nós restantes na pilha para processamento posterior
	stack.push({
	nodes,
	context
	});

	// Processa o nó atual
	if (node.nodeType === this.nodeTypes.TEXT_NODE) {
	// Ignora nós de texto vazios
	if (node.nodeValue.trim() !== '') {
	output.push(new docx.Paragraph({ children: [new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " "))] }));
	}
	continue;
	}

	if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) {
	continue;
	}

	const tagName = node.tagName.toLowerCase();
	let childrenPromise;

	switch (tagName) {
	// 3. SUPORTE A TODOS OS CABEÇALHOS (h1-h6)
	case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
	output.push(new docx.Paragraph({
	text: node.textContent,
	heading: docx.HeadingLevel[`HEADING_${tagName.charAt(1)}`],
	}));
	break;

	case "p":
	childrenPromise = this.processParagraphChildren(node.childNodes);
	output.push(new docx.Paragraph({ children: await childrenPromise }));
	break;

	// 4. SUPORTE A LISTAS (ul, ol)
	case "ul":
	case "ol":
	const listContext = {
	isList: true,
	numbering: tagName === 'ol' ? { reference: "default-numbering", level: context.numbering ? context.numbering.level + 1 : 0 } : undefined,
	bullet: tagName === 'ul' ? { level: context.bullet ? context.bullet.level + 1 : 0 } : undefined
	};
	stack.push({ nodes: Array.from(node.childNodes).reverse(), context: listContext });
	break;

	case "li":
	// `li` deve ser tratado como parágrafo, mas com estilo de lista
	const liChildren = await this.processParagraphChildren(node.childNodes);
	output.push(new docx.Paragraph({
	children: liChildren,
	numbering: context.numbering,
	bullet: context.bullet,
	}));
	break;

	// 5. SUPORTE A TABELAS
	case "table":
	const table = await this.createTable(node);
	output.push(table);
	break;

	// 6. SUPORTE A IMAGENS
	case "img":
	const image = await this.createImage(node.src);
	if (image) {
	output.push(new docx.Paragraph({ children: [image] }));
	}
	break;

	default:
	// Trata outras tags de bloco como parágrafos
	if (node.childNodes.length > 0) {
	stack.push({ nodes: Array.from(node.childNodes).reverse(), context });
	}
	}
	}
	return output;
	}

	// Processa os filhos de um parágrafo (texto, strong, em, br, links)
	async processParagraphChildren(nodes) {
	const runs = [];
	for (const node of nodes) {
	if (node.nodeType === this.nodeTypes.TEXT_NODE) {
	runs.push(new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " ")));
	continue;
	}
	if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) continue;

	const tagName = node.tagName.toLowerCase();
	const childRuns = await this.processParagraphChildren(node.childNodes);

	switch (tagName) {
	case "b": case "strong":
	childRuns.forEach(run => run.options.bold = true);
	runs.push(...childRuns);
	break;
	case "i": case "em":
	childRuns.forEach(run => run.options.italics = true);
	runs.push(...childRuns);
	break;

	// 7. SUPORTE A LINKS
	case "a":
	runs.push(new docx.ExternalHyperlink({
	link: node.getAttribute('href'),
	children: childRuns.length > 0 ? childRuns : [new docx.TextRun({ text: node.textContent, style: "Hyperlink" })],
	}));
	break;

	// 8. SUPORTE A QUEBRA DE LINHA
	case "br":
	runs.push(new docx.TextRun({ break: 1 }));
	break;

	default:
	runs.push(...childRuns);
	}
	}
	return runs;
	}

	async createImage(src) {
	try {
	let data;
	if (src.startsWith('data:image')) {
	// Converte de Base64 para buffer
	const base64 = src.split(',')[1];
	const binaryStr = atob(base64);
	const len = binaryStr.length;
	const bytes = new Uint8Array(len);
	for (let i = 0; i < len; i++) {
	bytes[i] = binaryStr.charCodeAt(i);
	}
	data = bytes.buffer;
	} else {
	// Busca a imagem de uma URL
	const res = await fetch(src);
	data = await res.arrayBuffer();
	}
	// Dimensões podem ser adicionadas aqui se necessário
	return new docx.ImageRun({ data, transformation: { width: 300, height: 200 }});
	} catch (error) {
	console.error("Não foi possível processar a imagem:", src, error);
	return null;
	}
	}

	async createTable(node) {
	const rows = [];
	for (const rowNode of node.querySelectorAll('tr')) {
	const cells = [];
	for (const cellNode of rowNode.querySelectorAll('th, td')) {
	const cellChildren = await this.processNodes(cellNode.childNodes);
	cells.push(new docx.TableCell({
	children: cellChildren,
	shading: cellNode.tagName.toLowerCase() === 'th' ? { fill: "E0E0E0" } : undefined
	}));
	}
	rows.push(new docx.TableRow({ children: cells }));
	}
	return new docx.Table({ rows });
	}
	}