Created
April 14, 2024 10:06
-
-
Save unphased/628a116cdde6f2d3f49bbe04a8e3d555 to your computer and use it in GitHub Desktop.
ANSI escape sequence to HTML, for illustrative purposes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export const htmlEscape = (str) => { | |
return str.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>'); | |
}; | |
export const convertAnsiHtml = (ansi: string) => { | |
const html_a: string[] = []; | |
const cleaned_a: string[] = []; | |
const index_a: number[][] = []; // start locations of escape sequences | |
const len_a: number[][] = []; // lengths of escape sequences. use these to reconstruct modifications done based on math on cleaned string for raw input (or html) string. | |
ansi.split('\n').forEach((line) => { | |
let match: RegExpExecArray | null; | |
let html = ''; | |
let cleaned = ''; | |
let idx = 0; | |
const indexs: number[] = []; | |
const lens: number[] = []; | |
let nestCount = 0; | |
for (const escapeRE = /\x1b\[([0-9;:]*)m/g; match = escapeRE.exec(line);) { | |
const index = escapeRE.lastIndex; | |
const len = match[0].length; | |
const start = index - len; | |
const source_segment = line.slice(idx, start); | |
html += htmlEscape(source_segment); | |
cleaned += source_segment; | |
indexs.push(start); | |
lens.push(len); | |
// l(`ansi escapes: ${col.magenta}>>>${reset}${line}${col.magenta}<<<${reset}`, `idx=${index}`, 'match', match, `ss >>${source_segment}<<`, `rebuilt >>${html}<<`, `idx=${idx}`); | |
const colorCode = match[1]; | |
// filter out special meaning codes. the rest of the simple codes will be sent straight for handling via CSS. | |
// TODO: consider parsing and splitting on semicolon. But for now I do not generate combined sequences yet, | |
// and the same effects can be achieved with multiple separate sequences. | |
// TODO: Actually track state and close tags when needed when detecting changing within modes. Not far from being | |
// able to do this. | |
// will NOT try to tackle the issue of combining multiple effects across boundaries, e.g. halfway through bold | |
// style change the background style and expect to maintain the bold style. HTML is fundamentally not designed | |
// for this. | |
let tag = ''; | |
switch (colorCode) { | |
case "0": // reset all | |
case "": | |
case "39": // reset foreground | |
case "49": // reset background | |
case "22": // reset bold/dim | |
case "23": // reset italic | |
case "24": // reset underline | |
case "27": // reset inverse | |
case "29": // reset strikethrough | |
case "59": // reset underline color | |
tag = '</span>'; | |
nestCount--; | |
break; | |
case "1": // bold | |
tag = '<span class="ansi-bold">'; | |
break; | |
case "2": // dim | |
tag = '<span class="ansi-dim">'; | |
break; | |
case "3": // italic | |
tag = '<span class="ansi-italic">'; | |
break; | |
case "4": // underline | |
tag = '<span class="ansi-underline">'; | |
break; | |
case "7": // inverse | |
tag = '<span class="ansi-inverse">'; | |
break; | |
case "9": // strikethrough | |
tag = '<span class="ansi-strikethrough">'; | |
break; | |
default: // handle classes. 8 and 256 colors will probably get implemented as a CSS lookup table via CSSOM; 24 bit colors will use inline style | |
let match: RegExpMatchArray | null; | |
if (match = colorCode.match(/^38;5;(\d+)$/)) { | |
tag = `<span class='ansi-fg-256-${match[1]}'>`; | |
} else if (match = colorCode.match(/^48;5;(\d+)$/)) { | |
tag = `<span class='ansi-bg-256-${match[1]}'>`; | |
} else if (match = colorCode.match(/^38;2;(\d+);(\d+);(\d+)$/)) { | |
tag = `<span class='ansi-tc-fg' style='color:rgb(${match[1]},${match[2]},${match[3]};'>`; | |
} else if (match = colorCode.match(/^48;2;(\d+);(\d+);(\d+)$/)) { | |
tag = `<span class='ansi-tc-bg' style='background-color:rgb(${match[1]},${match[2]},${match[3]};'>`; | |
} else if (match = colorCode.match(/^4:(\d)$/)) { | |
tag = `<span class='ansi-ul-style-${match[1]}'>`; | |
} else if (match = colorCode.match(/^58[:;]5[:;](\d+)$/)) { | |
tag = `<span class='ansi-ul-256-${match[1]}'>`; | |
} else if (match = colorCode.match(/^58[:;]2[:;](\d+);(\d+);(\d+)$/)) { | |
tag = `<span class='ansi-ul-tc' style='text-decoration-color:rgb(${match[1]},${match[2]},${match[3]};'>`; | |
} else if (match = colorCode.match(/^(\d+)$/)) { // plain 8/16-color colors | |
const color = parseInt(colorCode); | |
if (color <= 37 && color >= 30) { // 8 color fg | |
tag = `<span class="ansi-fg-${color}">` | |
} | |
if (color <= 47 && color >= 40) { // 8 color bg | |
tag = `<span class="ansi-bg-${color}">` // what | |
} | |
if (color <= 97 && color >= 90) { // bright? "16 color" fg | |
tag = `<span class="ansi-fg-${color}">` | |
} | |
} | |
break; | |
} | |
if (tag.indexOf('<span') === 0) { | |
nestCount++; | |
} | |
html += tag; | |
idx = index; | |
} | |
// this handles the last segment that comes after the last escape sequence, or the entire line when no escape | |
// sequences are present. | |
const final_source_segment = line.slice(idx); | |
html += htmlEscape(final_source_segment); | |
cleaned += final_source_segment; | |
index_a.push(indexs); | |
len_a.push(lens); | |
// equalize nesting on a per line basis to stop runaway nesting | |
if (nestCount < 0) { | |
html = '<span>'.repeat(-nestCount) + html; | |
// l(`negative nestCount=${nestCount} content=${html}`) | |
} else if (nestCount > 0) { | |
html += '</span>'.repeat(nestCount); | |
// l(`positive nestCount=${nestCount} content=${html}`) | |
} | |
// very straightforward affair compared to the usual problem of converting ansi codes, since I'm | |
// using this on colors that I generate in the app myself, it's possible to directly convert | |
// color sequences into html replacements. | |
// Do not need to confirm if there is a matching number of opening and closing tags or even | |
// their position... just straight up convert to tags, and then keep track of depth and supply | |
// closing tags by the end of the line in a way to ensure no unnecessary depth is being | |
// introduced into the tree. | |
html_a.push(html); | |
cleaned_a.push(cleaned); | |
}); | |
return { html:html_a, cleaned:cleaned_a, idxs: index_a, lens: len_a }; | |
}; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment