Last active
February 8, 2024 08:24
-
-
Save jonschlinkert/d5e6d37783d91d86cdd88356500d414d to your computer and use it in GitHub Desktop.
I created this to see how hard it would be to provide syntax highlighting for regex in the terminal. Try it out and see for yourself!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
const colors = require('ansi-colors'); | |
const chars = { | |
backslash: '\\', | |
backtick: '`', | |
caret: '^', | |
colon: ':', | |
comma: ',', | |
dollar: '$', | |
dot: '.', | |
double_quote: '"', | |
equal: '=', | |
left_angle: '<', | |
left_brace: '{', | |
left_bracket: '[', | |
left_paren: '(', | |
not: '!', | |
pipe: '|', | |
plus: '+', | |
qmark: '?', | |
right_angle: '>', | |
right_brace: '}', | |
right_bracket: ']', | |
right_paren: ')', | |
single_quote: "'", | |
slash: '/', | |
star: '*' | |
}; | |
const charmap = new Map(); | |
for (const key of Object.keys(chars)) { | |
charmap.set(chars[key], key); | |
} | |
class Node { | |
constructor(node) { | |
this.type = node.type; | |
this.value = node.value || ''; | |
this.output = node.output || ''; | |
} | |
append(value = '', output = '') { | |
this.parent && this.parent.append(value, output); | |
this.output += output; | |
this.value += value; | |
} | |
render(level = 0, styles) { | |
const { parent, type, prev, next } = this; | |
let output = this.output || this.value; | |
if (styles === false) { | |
return output; | |
} | |
if (type === 'dot') { | |
return colors.blue(output); | |
} | |
if (!next && type === 'text') { | |
return colors.red(output); | |
} | |
if (type === 'left_angle' && parent.type === 'paren' && prev.type === 'qmark') { | |
return colors.white(output); | |
} | |
if (parent.type === 'paren') { | |
if ((type === 'equal' || type === 'not') && prev.type === 'left_angle') { | |
return colors.white(output); | |
} | |
if (['equal', 'not', 'colon'].includes(type) && prev.type === 'qmark') { | |
return colors.white(output); | |
} | |
} | |
if (type === 'qmark' && prev.type !== 'left_paren') { | |
return colors.red(output); | |
} | |
if (['qmark', 'paren'].includes(type)) { | |
return colors.white(output); | |
} | |
if (type === 'escaped') { | |
return /^\\[0-9]/.test(output) ? colors.red(output) : colors.blue(output); | |
} | |
if (type.endsWith('bracket') || parent.type === 'bracket') { | |
if (output[0] === '^') { | |
return colors.red(output[0]) + colors.blue(output.slice(1)); | |
} | |
return colors.blue(output); | |
} | |
if (['caret', 'dollar', 'star', 'pipe', 'plus'].includes(type)) { | |
return colors.red(output); | |
} | |
if (type === 'number' && parent.type === 'brace') { | |
return colors.cyan(output); | |
} | |
if (type.endsWith('brace') || (type === 'comma' && parent.type === 'brace')) { | |
return colors.red(output); | |
} | |
if (type.endsWith('paren')) { | |
return colors.yellow(output); | |
} | |
if (type.endsWith('angle')) { | |
return colors.white(output); | |
} | |
return colors.yellow(output); | |
} | |
get siblings() { | |
return this.parent && this.parent.nodes || []; | |
} | |
get index() { | |
return this.siblings.indexOf(this); | |
} | |
get prev() { | |
const prev = this.siblings[this.index - 1]; | |
if (!prev && this.parent) { | |
return this.parent.prev; | |
} | |
return prev; | |
} | |
get next() { | |
return this.siblings[this.index + 1]; | |
} | |
} | |
class Block extends Node { | |
constructor(node) { | |
super(node); | |
this.nodes = []; | |
} | |
push(node) { | |
this.nodes.push(node); | |
node.parent = this; | |
} | |
renderNodes(nodes = [], level = 0, styles = true) { | |
return nodes.map(node => node.render(level, styles)).join(''); | |
} | |
renderInner(level = 0, styles = true) { | |
return this.renderNodes(this.nodes.slice(1, -1), level, styles); | |
} | |
render(level = 0, styles = true) { | |
const { type, prev, parent, nodes } = this; | |
if (styles && prev && prev.type === 'qmark' && type === 'angle') { | |
if (parent.type === 'paren') { | |
return `<${colors.green(this.renderInner(level + 1, false))}>`; | |
} | |
} | |
return this.renderNodes(this.nodes, level + 1, styles); | |
} | |
} | |
const parse = value => { | |
const input = String(value); | |
const ast = new Block({ type: 'root', nodes: [] }); | |
const stack = [ast]; | |
let block = ast; | |
let match; | |
let prev; | |
let i = 0; | |
const remaining = () => input.slice(i + 1); | |
const push = node => { | |
if (prev && prev.type === 'text' && node.type === 'text') { | |
prev.append(node.value); | |
return; | |
} | |
if (!(node instanceof Node)) { | |
node = node.nodes ? new Block(node) : new Node(node); | |
} | |
block.push(node); | |
if (node.nodes) { | |
stack.push(node); | |
block = node; | |
} else { | |
block.append(node.value); | |
} | |
prev = node; | |
}; | |
const pop = () => { | |
const parent = stack.pop(); | |
block = stack[stack.length - 1]; | |
return parent; | |
}; | |
for (; i < input.length; i++) { | |
let code = input.charCodeAt(i); | |
let value = input[i]; | |
let token = { type: 'unknown', value }; | |
if (value === '\\') { | |
token.type = block.type === 'bracket' ? 'text' : 'escaped'; | |
if (/^u[0-9]{4}/.test(input.slice(i + 1))) { | |
token.value += input.slice(i + 1, i + 6); | |
i += 5; | |
} else { | |
token.value += input[++i]; | |
} | |
push(token); | |
continue; | |
} | |
if (block.type === 'bracket' && value !== ']') { | |
token.type = 'text'; | |
push(token); | |
continue; | |
} | |
if (code >= 48 && code <= 57) { | |
token.type = 'number'; | |
push(token); | |
continue; | |
} | |
// a-z | |
if (code >= 97 && code <= 122) { | |
token.type = 'text'; | |
token.lower = true; | |
push(token); | |
continue; | |
} | |
// A-Z | |
if (code >= 65 && code <= 90) { | |
token.type = 'text'; | |
token.upper = true; | |
push(token); | |
continue; | |
} | |
/** | |
* Braces | |
*/ | |
if (value === '{') { | |
token.type = 'left_brace'; | |
push(new Block({ type: 'brace' })); | |
push(token); | |
continue; | |
} | |
if (value === '}') { | |
token.type = 'right_brace'; | |
if (block.type === 'brace') { | |
push(token); | |
pop(); | |
continue; | |
} | |
push(token); | |
continue; | |
} | |
/** | |
* Brackets | |
*/ | |
if (value === '[') { | |
token.type = 'left_bracket'; | |
push(new Block({ type: 'bracket' })); | |
push(token); | |
continue; | |
} | |
if (value === ']') { | |
token.type = 'right_bracket'; | |
if (block.type === 'bracket') { | |
push(token); | |
pop(); | |
continue; | |
} | |
token.type = 'text'; | |
push(token); | |
continue; | |
} | |
/** | |
* Angles | |
*/ | |
if (value === '<') { | |
token.type = 'left_angle'; | |
if (block.type === 'paren' && prev.type === 'qmark') { | |
let next = input[i + 1]; | |
if (next === '!' || next === '=') { | |
push(token); | |
continue; | |
} | |
} | |
push(new Block({ type: 'angle' })); | |
push(token); | |
continue; | |
} | |
if (value === '>') { | |
token.type = 'right_angle'; | |
if (block.type === 'angle') { | |
push(token); | |
pop(); | |
continue; | |
} | |
push(token); | |
continue; | |
} | |
/** | |
* Parens | |
*/ | |
if (value === '(') { | |
token.type = 'left_paren'; | |
push(new Block({ type: 'paren' })); | |
push(token); | |
continue; | |
} | |
if (value === ')') { | |
token.type = 'right_paren'; | |
if (block.type === 'paren') { | |
push(token); | |
pop(); | |
continue; | |
} | |
push(token); | |
continue; | |
} | |
/** | |
* Charmap | |
*/ | |
if (charmap.has(value)) { | |
token.type = charmap.get(value); | |
push(token); | |
continue; | |
} | |
/** | |
* Everything else | |
*/ | |
push(token); | |
} | |
// console.log(ast.nodes) | |
console.log(ast.render()); | |
return ast; | |
}; | |
const regex = [ | |
/^a(?<foo>z)[a-b]$/, | |
/(^[*!]|[/{[()\]}"])/, | |
/(?:\[.*?(?<!\\)\]|\\(?<=.))/g, | |
/\[a-b.=/, | |
/\[a-b].=/, | |
/\[a-b\].=/, | |
/^(.*?)\.(\w+)$/, | |
/\\(?![*+?^${}(|)[\]])/g, | |
/\\(?![*+?^${}(|)[\]]+)?/g, | |
/\\(?![*+?^${}(|)[\]]+)*?/g, | |
/^[^@![\].,$*+?^{}()|\\/]+/, | |
/[-*+?.^${}(|)[\]]/, | |
/(\\?)((\W)(\3*))/g, | |
/([-*+?.^${}(|)[\]])/g, | |
/^(\.)$/gimy, | |
/^(\+)$/gimy, | |
/^(\?)$/gimy, | |
/^([\\/])$/gimy, | |
/^((?=.))$/gimy, | |
/^([^\\/])$/gimy, | |
/^((?:[\\/]|$))$/gimy, | |
/^(\.{1,2}(?:[\\/]|$))$/gimy, | |
/^((?!\.))$/gimy, | |
/^((?!(?:^|[\\/])\.{1,2}(?:[\\/]|$)))$/gimy, | |
/^((?!\.{0,1}(?:[\\/]|$)))$/gimy, | |
/^((?!\.{1,2}(?:[\\/]|$)))$/gimy, | |
/^([^.\\/])$/gimy, | |
/^([^\\/]*?)$/gimy, | |
/^((?:^|[\\/]))$/gimy, | |
/^([a-zA-Z0-9])$/gimy, | |
/^([a-zA-Z])$/gimy, | |
/^([\x00-\x7F]{2})$/gimy, | |
/^([ \t])$/gimy, | |
/^([\x00-\x1F\x7F])$/gimy, | |
/^([0-9])$/gimy, | |
/^([\x21-\x7E])$/gimy, | |
/^([a-z])$/gimy, | |
/^([\x20-\x7E ])$/gimy, | |
/^([\-!"#$%&'()\*+,./:;<=>?@[\]^_`{|}~])$/gimy, | |
/^([ \t\r\n\v\f])$/gimy, | |
/^([A-Z])$/gimy, | |
/^([A-Za-z0-9_])$/gimy, | |
/^([A-Fa-f0-9])$/gimy, | |
/[\u001b\u009b][[\]#;?()]*(?:(?:(?:[^\W_]*;?[^\W_]*)\u0007)|(?:(?:[0-9]{1,4}(;[0-9]{0,4})*)?[~0-9=<>cf-nqrtyA-PRZ]))/g | |
]; | |
regex.forEach(re => parse(re)); | |
console.log(/[\u001b\u009b][[\]#;?()]*(?:(?:(?:[^\W_]*;?[^\W_]*)\u0007)|(?:(?:[0-9]{1,4}(;[0-9]{0,4})*)?[~0-9=<>cf-nqrtyA-PRZ]))/g); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment