Skip to content

Instantly share code, notes, and snippets.

@hiiamyes
Created April 5, 2020 12:27
Show Gist options
  • Save hiiamyes/34458fac7c831a4588d0fd232027a1b6 to your computer and use it in GitHub Desktop.
Save hiiamyes/34458fac7c831a4588d0fd232027a1b6 to your computer and use it in GitHub Desktop.
national-park-captcha-tesseract
const { createWorker } = require("tesseract.js");
const fs = require("fs");
const path = require("path");
const worker = createWorker();
const log = console.log;
(async () => {
await worker.load();
await worker.loadLanguage("eng");
await worker.initialize("eng");
await worker.setParameters({
tessedit_char_whitelist:
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
});
const images = fs.readdirSync(path.resolve(__dirname, "../images"));
let success = 0;
for (const image of images) {
let {
data: { text },
} = await worker.recognize(path.resolve(__dirname, "../images", image));
text = text.replace("\n", "");
if (image.replace(".png", "") === text) success++;
log(image, text);
}
log(`success: ${success}`);
log(`total: ${images.length}`);
await worker.terminate();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment