Skip to content

Instantly share code, notes, and snippets.

@ndesmic
Created October 16, 2024 22:38
Show Gist options
  • Save ndesmic/835e36ecd3e2751adde4e7082058c29f to your computer and use it in GitHub Desktop.
Save ndesmic/835e36ecd3e2751adde4e7082058c29f to your computer and use it in GitHub Desktop.
File Matcher
import { Tokenizer } from "./tokenizer.js";
const textOutput = Deno.readTextFileSync("./output/doc.txt");
const textExpected = Deno.readTextFileSync("./expected/doc.txt");
const tokenizer = new Tokenizer([
{ matcher: /\{\{\{/, type: "regex-start" },
{ matcher: /\}\}\}/, type: "regex-end" },
{ matcher: /[a-zA-Z0-9\[\]\(\)\{\}\.\$\-:\!\\ \t]/, type: "string", valueExtractor: x => x }
]);
let i = 0;
let isRegex = false;
let regexBuffer = "";
const tokens = [...tokenizer.tokenize(textExpected)];
for (const token of tokens){
switch(token.type){
case "string": {
if(isRegex){
regexBuffer += token.value;
} else {
if (textOutput[i] === token.value) {
i++;
continue;
} else {
throw new Error(`Text did not match at index ${i}`)
}
}
break;
}
case "regex-start": {
isRegex = true;
break;
}
case "regex-end": {
isRegex = false;
const regex = new RegExp(regexBuffer, "y");
regex.lastIndex = i;
const matched = regex.exec(textOutput);
i += matched[0].length;
regexBuffer = "";
break;
}
}
}
console.log("Matched!")
export const END = Symbol("END");
export class Tokenizer {
#tokenTypes;
constructor(tokenTypes) {
this.#tokenTypes = tokenTypes;
}
*tokenize(text) {
let index = 0;
while (index < text.length) {
let hasMatch = false;
for (const { matcher, type, valueExtractor } of this.#tokenTypes) {
const currentMatcher = new RegExp(matcher.source, "y");
currentMatcher.lastIndex = index;
const matched = currentMatcher.exec(text);
if (matched !== null) {
index += matched[0].length;
if (type != null) {
const token = { type };
if (valueExtractor) {
token.value = valueExtractor(matched[0]);
}
yield token;
}
hasMatch = true;
}
}
if (!hasMatch) {
throw new Error(`Unexpected token at index ${index}`);
}
}
yield { type: END };
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment