Skip to content

Instantly share code, notes, and snippets.

@celsowm
Created July 24, 2025 02:59
Show Gist options
  • Save celsowm/b86edfa31a224e73a080abf7305819fc to your computer and use it in GitHub Desktop.
Save celsowm/b86edfa31a224e73a080abf7305819fc to your computer and use it in GitHub Desktop.
HtmlToDocxConverter.js
class HTMLtoDOCX {
constructor(htmlString) {
// 1. GARANTIR SEGURANÇA CONTRA CONFLITO DE 'NODE'
// Em alguns bundlers, `Node` pode ser sobrescrito. Usar `window.Node` garante que estamos usando as constantes do DOM.
this.nodeTypes = {
ELEMENT_NODE: window.Node.ELEMENT_NODE,
TEXT_NODE: window.Node.TEXT_NODE,
};
const parser = new DOMParser();
this.doc = parser.parseFromString(htmlString, "text/html");
this.numbering = this.createNumbering();
}
// Define a numeração para listas ordenadas (ol)
createNumbering() {
return new docx.Numbering({
config: [{
reference: "default-numbering",
levels: [{
level: 0,
format: "decimal",
text: "%1.",
alignment: docx.AlignmentType.START,
style: {
paragraph: {
indent: { left: 720, hanging: 360 },
},
},
}, {
level: 1,
format: "lowerLetter",
text: "%2.",
alignment: docx.AlignmentType.START,
style: {
paragraph: {
indent: { left: 1440, hanging: 360 },
},
},
}, ],
}, ],
});
}
async createDocx() {
const children = await this.processNodes(this.doc.body.childNodes);
const doc = new docx.Document({
numbering: this.numbering,
sections: [{
children: children
}],
});
return docx.Packer.toBlob(doc);
}
// 2. PROCESSAMENTO ITERATIVO (AO INVÉS DE RECURSIVO) PARA PERFORMANCE
// Usa uma pilha para evitar estouro em documentos grandes/profundos.
async processNodes(nodes, initialContext = {}) {
const output = [];
const stack = [{
nodes: Array.from(nodes).reverse(),
context: initialContext
}];
const imagePromises = [];
while (stack.length > 0) {
const {
nodes,
context
} = stack.pop();
if (nodes.length === 0) continue;
const node = nodes.pop();
// Repõe os nós restantes na pilha para processamento posterior
stack.push({
nodes,
context
});
// Processa o nó atual
if (node.nodeType === this.nodeTypes.TEXT_NODE) {
// Ignora nós de texto vazios
if (node.nodeValue.trim() !== '') {
output.push(new docx.Paragraph({ children: [new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " "))] }));
}
continue;
}
if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) {
continue;
}
const tagName = node.tagName.toLowerCase();
let childrenPromise;
switch (tagName) {
// 3. SUPORTE A TODOS OS CABEÇALHOS (h1-h6)
case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
output.push(new docx.Paragraph({
text: node.textContent,
heading: docx.HeadingLevel[`HEADING_${tagName.charAt(1)}`],
}));
break;
case "p":
childrenPromise = this.processParagraphChildren(node.childNodes);
output.push(new docx.Paragraph({ children: await childrenPromise }));
break;
// 4. SUPORTE A LISTAS (ul, ol)
case "ul":
case "ol":
const listContext = {
isList: true,
numbering: tagName === 'ol' ? { reference: "default-numbering", level: context.numbering ? context.numbering.level + 1 : 0 } : undefined,
bullet: tagName === 'ul' ? { level: context.bullet ? context.bullet.level + 1 : 0 } : undefined
};
stack.push({ nodes: Array.from(node.childNodes).reverse(), context: listContext });
break;
case "li":
// `li` deve ser tratado como parágrafo, mas com estilo de lista
const liChildren = await this.processParagraphChildren(node.childNodes);
output.push(new docx.Paragraph({
children: liChildren,
numbering: context.numbering,
bullet: context.bullet,
}));
break;
// 5. SUPORTE A TABELAS
case "table":
const table = await this.createTable(node);
output.push(table);
break;
// 6. SUPORTE A IMAGENS
case "img":
const image = await this.createImage(node.src);
if (image) {
output.push(new docx.Paragraph({ children: [image] }));
}
break;
default:
// Trata outras tags de bloco como parágrafos
if (node.childNodes.length > 0) {
stack.push({ nodes: Array.from(node.childNodes).reverse(), context });
}
}
}
return output;
}
// Processa os filhos de um parágrafo (texto, strong, em, br, links)
async processParagraphChildren(nodes) {
const runs = [];
for (const node of nodes) {
if (node.nodeType === this.nodeTypes.TEXT_NODE) {
runs.push(new docx.TextRun(node.nodeValue.replace(/\u00A0/g, " ")));
continue;
}
if (node.nodeType !== this.nodeTypes.ELEMENT_NODE) continue;
const tagName = node.tagName.toLowerCase();
const childRuns = await this.processParagraphChildren(node.childNodes);
switch (tagName) {
case "b": case "strong":
childRuns.forEach(run => run.options.bold = true);
runs.push(...childRuns);
break;
case "i": case "em":
childRuns.forEach(run => run.options.italics = true);
runs.push(...childRuns);
break;
// 7. SUPORTE A LINKS
case "a":
runs.push(new docx.ExternalHyperlink({
link: node.getAttribute('href'),
children: childRuns.length > 0 ? childRuns : [new docx.TextRun({ text: node.textContent, style: "Hyperlink" })],
}));
break;
// 8. SUPORTE A QUEBRA DE LINHA
case "br":
runs.push(new docx.TextRun({ break: 1 }));
break;
default:
runs.push(...childRuns);
}
}
return runs;
}
async createImage(src) {
try {
let data;
if (src.startsWith('data:image')) {
// Converte de Base64 para buffer
const base64 = src.split(',')[1];
const binaryStr = atob(base64);
const len = binaryStr.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryStr.charCodeAt(i);
}
data = bytes.buffer;
} else {
// Busca a imagem de uma URL
const res = await fetch(src);
data = await res.arrayBuffer();
}
// Dimensões podem ser adicionadas aqui se necessário
return new docx.ImageRun({ data, transformation: { width: 300, height: 200 }});
} catch (error) {
console.error("Não foi possível processar a imagem:", src, error);
return null;
}
}
async createTable(node) {
const rows = [];
for (const rowNode of node.querySelectorAll('tr')) {
const cells = [];
for (const cellNode of rowNode.querySelectorAll('th, td')) {
const cellChildren = await this.processNodes(cellNode.childNodes);
cells.push(new docx.TableCell({
children: cellChildren,
shading: cellNode.tagName.toLowerCase() === 'th' ? { fill: "E0E0E0" } : undefined
}));
}
rows.push(new docx.TableRow({ children: cells }));
}
return new docx.Table({ rows });
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment