disco0 · October 9, 2020 04:04
diff --git a/readme.md b/readme.md
diff --git a/transform.js b/transform.js
 // proof of concept rudimentary transform method
 // translates a regex source as described above to a valid javascript regex

 function parseRegex(source) {
 	const root = {children: []};

 	let currentNode = root;
 	let currentText = '';
 	function split() {
 		if (currentText.length > 0) {
 			currentNode.children.push(currentText);
 			currentText = '';
 		}
 	}

 	for (const line of source.split("\n").map(l => l.trim())) {
 		if (line.startsWith("//")) {
 			// line is a comment
 			continue;
 		}

 		let index = 0;
 		while (index < line.length) {
 			const char = source[index++];
 			if (char === "\\") {
 				currentText += char + line[index++];
 				continue;
 			} if (char === "\t" || char === " ") {
 				continue;
 			} else if (char === "(") {
 				split();
 				const group = {
 					parent: currentNode,
 					children: [],
 				};

 				if (line.startsWith(">=", index)) {
 					group.kind = "positive lookahead";
 					index += 2;
 				} else if (line.startsWith(">!", index)) {
 					group.kind = "negative lookahead";
 					index += 2;
 				} else if (line.startsWith(">=", index)) {
 					group.kind = "positive lookbehind";
 					index += 2;
 				} else if (line.startsWith(">!", index)) {
 					group.kind = "negative lookbehind";
 					index += 2;
 				} else if (line.startsWith("?#", index)) {
 					group.kind = "numbered capture";
 					index += 2;
 				} else if (line.startsWith("?", index)) {
 					group.kind = "named capture";
 					group.name = "";
 					while (++index < line.length && line[index] !== ' ') {
 						group.name += line[index];
 					}
 				} else {
 					group.kind = "non-capture";
 				}

 				currentNode.children.push(group);
 				currentNode = group;
 			} else if (char === ")") {
 				split();
 				currentNode = currentNode.parent;
 			} else {
 				currentText += char;
 			}
 		}
 	}

 	return root;
 }

 function translateCore(nodes) {
 	let content = "";
 	for (const node of nodes) {
 		if (typeof node === "string") {
 			content += node;
 		} else if (node.kind === "positive lookahead") {
 			content += `(?=${translateCore(node.children)})`;
 		} else if (node.kind === "negative lookahead") {
 			content += `(?!${translateCore(node.children)})`;
 		} else if (node.kind === "positive lookbehind") {
 			content += `(?<=${translateCore(node.children)})`;
 		} else if (node.kind === "negative lookbehind") {
 			content += `(?<!${translateCore(node.children)})`;
 		} else if (node.kind === "numbered capture") {
 			content += `(${translateCore(node.children)})`;
 		} else if (node.kind === "named capture") {
 			content += `(?<${node.name}>${translateCore(node.children)})`;
 		} else if (node.kind === "non-capture") {
 			content += `(?:${translateCore(node.children)})`;
 		}
 	}

 	return content;
 }

 function translateRegex(source) {
 	const tree = parseRegex(source);
 	console.log(tree);
 	return `/${translateCore(tree.children)}/g`;
 }
	// proof of concept rudimentary transform method
	// translates a regex source as described above to a valid javascript regex

	function parseRegex(source) {
	const root = {children: []};

	let currentNode = root;
	let currentText = '';
	function split() {
	if (currentText.length > 0) {
	currentNode.children.push(currentText);
	currentText = '';
	}
	}

	for (const line of source.split("\n").map(l => l.trim())) {
	if (line.startsWith("//")) {
	// line is a comment
	continue;
	}

	let index = 0;
	while (index < line.length) {
	const char = source[index++];
	if (char === "\\") {
	currentText += char + line[index++];
	continue;
	} if (char === "\t" \|\| char === " ") {
	continue;
	} else if (char === "(") {
	split();
	const group = {
	parent: currentNode,
	children: [],
	};

	if (line.startsWith(">=", index)) {
	group.kind = "positive lookahead";
	index += 2;
	} else if (line.startsWith(">!", index)) {
	group.kind = "negative lookahead";
	index += 2;
	} else if (line.startsWith(">=", index)) {
	group.kind = "positive lookbehind";
	index += 2;
	} else if (line.startsWith(">!", index)) {
	group.kind = "negative lookbehind";
	index += 2;
	} else if (line.startsWith("?#", index)) {
	group.kind = "numbered capture";
	index += 2;
	} else if (line.startsWith("?", index)) {
	group.kind = "named capture";
	group.name = "";
	while (++index < line.length && line[index] !== ' ') {
	group.name += line[index];
	}
	} else {
	group.kind = "non-capture";
	}

	currentNode.children.push(group);
	currentNode = group;
	} else if (char === ")") {
	split();
	currentNode = currentNode.parent;
	} else {
	currentText += char;
	}
	}
	}

	return root;
	}

	function translateCore(nodes) {
	let content = "";
	for (const node of nodes) {
	if (typeof node === "string") {
	content += node;
	} else if (node.kind === "positive lookahead") {
	content += `(?=${translateCore(node.children)})`;
	} else if (node.kind === "negative lookahead") {
	content += `(?!${translateCore(node.children)})`;
	} else if (node.kind === "positive lookbehind") {
	content += `(?<=${translateCore(node.children)})`;
	} else if (node.kind === "negative lookbehind") {
	content += `(?<!${translateCore(node.children)})`;
	} else if (node.kind === "numbered capture") {
	content += `(${translateCore(node.children)})`;
	} else if (node.kind === "named capture") {
	content += `(?<${node.name}>${translateCore(node.children)})`;
	} else if (node.kind === "non-capture") {
	content += `(?:${translateCore(node.children)})`;
	}
	}

	return content;
	}

	function translateRegex(source) {
	const tree = parseRegex(source);
	console.log(tree);
	return `/${translateCore(tree.children)}/g`;
	}