Created
July 24, 2025 02:59
-
-
Save celsowm/b86edfa31a224e73a080abf7305819fc to your computer and use it in GitHub Desktop.
HtmlToDocxConverter.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class HTMLtoDOCX { | |
constructor(htmlString) { | |
// 1. GARANTIR SEGURANÇA CONTRA CONFLITO DE 'NODE' | |
// Em alguns bundlers, `Node` pode ser sobrescrito. Usar `window.Node` garante que estamos usando as constantes do DOM. | |
this.nodeTypes = { | |
ELEMENT_NODE: window.Node.ELEMENT_NODE, | |
TEXT_NODE: window.Node.TEXT_NODE, | |
}; | |
const parser = new DOMParser(); | |
this.doc = parser.parseFromString(htmlString, "text/html"); | |
this.numbering = this.createNumbering(); | |
} | |
// Define a numeração para listas ordenadas (ol) | |
createNumbering() { | |
return new docx.Numbering({ | |
config: [{ | |
reference: "default-numbering", | |
levels: [{ | |
level: 0, | |
format: "decimal", | |
text: "%1.", | |
alignment: docx.AlignmentType.START, | |
style: { | |
paragraph: { | |
indent: { left: 720, hanging: 360 }, | |
}, | |
}, | |
}, { | |
level: 1, | |
format: "lowerLetter", | |
text: "%2.", | |
alignment: docx.AlignmentType.START, | |
style: { | |
paragraph: { | |
indent: { left: 1440, hanging: 360 }, | |
}, | |
}, | |
}, ], | |
}, ], | |
}); | |
} | |
async createDocx() { | |
const children = await this.processNodes(this.doc.body.childNodes); | |
const doc = new docx.Document({ | |
numbering: this.numbering, | |
sections: [{ | |
children: children | |
}], | |
}); | |
return docx.Packer.toBlob(doc); | |
} | |
// 2. PROCESSAMENTO ITERATIVO (AO INVÉS DE RECURSIVO) PARA PERFORMANCE | |
// Usa uma pilha para evitar estouro em documentos grandes/profundos. | |
async processNodes(nodes, initialContext = {}) { | |
const output = []; | |
const stack = [{ | |
nodes: Array.from(nodes).reverse(), | |
context: initialContext | |
}]; | |
const imagePromises = []; | |
while (stack.length > 0) { | |
const { | |
nodes, | |
context | |
} = stack.pop(); | |
if (nodes.length === 0) continue; | |
const node = nodes.pop(); | |
// Repõe os nós restantes na pilha para processamento posterior | |
stack.push({ | |
nodes, | |
context | |
}); | |
// Processa o nó atual | |
if (node.nodeType === this.nodeTypes.TEXT_NODE) { | |
// Ignora nós de texto vazios | |
if (node.nodeValue.trim() !== '') { | |
output.push(new docx.Paragraph({ children: [new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " "))] })); | |
} | |
continue; | |
} | |
if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) { | |
continue; | |
} | |
const tagName = node.tagName.toLowerCase(); | |
let childrenPromise; | |
switch (tagName) { | |
// 3. SUPORTE A TODOS OS CABEÇALHOS (h1-h6) | |
case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": | |
output.push(new docx.Paragraph({ | |
text: node.textContent, | |
heading: docx.HeadingLevel[`HEADING_${tagName.charAt(1)}`], | |
})); | |
break; | |
case "p": | |
childrenPromise = this.processParagraphChildren(node.childNodes); | |
output.push(new docx.Paragraph({ children: await childrenPromise })); | |
break; | |
// 4. SUPORTE A LISTAS (ul, ol) | |
case "ul": | |
case "ol": | |
const listContext = { | |
isList: true, | |
numbering: tagName === 'ol' ? { reference: "default-numbering", level: context.numbering ? context.numbering.level + 1 : 0 } : undefined, | |
bullet: tagName === 'ul' ? { level: context.bullet ? context.bullet.level + 1 : 0 } : undefined | |
}; | |
stack.push({ nodes: Array.from(node.childNodes).reverse(), context: listContext }); | |
break; | |
case "li": | |
// `li` deve ser tratado como parágrafo, mas com estilo de lista | |
const liChildren = await this.processParagraphChildren(node.childNodes); | |
output.push(new docx.Paragraph({ | |
children: liChildren, | |
numbering: context.numbering, | |
bullet: context.bullet, | |
})); | |
break; | |
// 5. SUPORTE A TABELAS | |
case "table": | |
const table = await this.createTable(node); | |
output.push(table); | |
break; | |
// 6. SUPORTE A IMAGENS | |
case "img": | |
const image = await this.createImage(node.src); | |
if (image) { | |
output.push(new docx.Paragraph({ children: [image] })); | |
} | |
break; | |
default: | |
// Trata outras tags de bloco como parágrafos | |
if (node.childNodes.length > 0) { | |
stack.push({ nodes: Array.from(node.childNodes).reverse(), context }); | |
} | |
} | |
} | |
return output; | |
} | |
// Processa os filhos de um parágrafo (texto, strong, em, br, links) | |
async processParagraphChildren(nodes) { | |
const runs = []; | |
for (const node of nodes) { | |
if (node.nodeType === this.nodeTypes.TEXT_NODE) { | |
runs.push(new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " "))); | |
continue; | |
} | |
if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) continue; | |
const tagName = node.tagName.toLowerCase(); | |
const childRuns = await this.processParagraphChildren(node.childNodes); | |
switch (tagName) { | |
case "b": case "strong": | |
childRuns.forEach(run => run.options.bold = true); | |
runs.push(...childRuns); | |
break; | |
case "i": case "em": | |
childRuns.forEach(run => run.options.italics = true); | |
runs.push(...childRuns); | |
break; | |
// 7. SUPORTE A LINKS | |
case "a": | |
runs.push(new docx.ExternalHyperlink({ | |
link: node.getAttribute('href'), | |
children: childRuns.length > 0 ? childRuns : [new docx.TextRun({ text: node.textContent, style: "Hyperlink" })], | |
})); | |
break; | |
// 8. SUPORTE A QUEBRA DE LINHA | |
case "br": | |
runs.push(new docx.TextRun({ break: 1 })); | |
break; | |
default: | |
runs.push(...childRuns); | |
} | |
} | |
return runs; | |
} | |
async createImage(src) { | |
try { | |
let data; | |
if (src.startsWith('data:image')) { | |
// Converte de Base64 para buffer | |
const base64 = src.split(',')[1]; | |
const binaryStr = atob(base64); | |
const len = binaryStr.length; | |
const bytes = new Uint8Array(len); | |
for (let i = 0; i < len; i++) { | |
bytes[i] = binaryStr.charCodeAt(i); | |
} | |
data = bytes.buffer; | |
} else { | |
// Busca a imagem de uma URL | |
const res = await fetch(src); | |
data = await res.arrayBuffer(); | |
} | |
// Dimensões podem ser adicionadas aqui se necessário | |
return new docx.ImageRun({ data, transformation: { width: 300, height: 200 }}); | |
} catch (error) { | |
console.error("Não foi possível processar a imagem:", src, error); | |
return null; | |
} | |
} | |
async createTable(node) { | |
const rows = []; | |
for (const rowNode of node.querySelectorAll('tr')) { | |
const cells = []; | |
for (const cellNode of rowNode.querySelectorAll('th, td')) { | |
const cellChildren = await this.processNodes(cellNode.childNodes); | |
cells.push(new docx.TableCell({ | |
children: cellChildren, | |
shading: cellNode.tagName.toLowerCase() === 'th' ? { fill: "E0E0E0" } : undefined | |
})); | |
} | |
rows.push(new docx.TableRow({ children: cells })); | |
} | |
return new docx.Table({ rows }); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment