Last active
April 29, 2019 18:54
-
-
Save javilobo8/951de419a72c1d2c829851448dd909b1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Tesseract = require('tesseract.js'); | |
const sharp = require('sharp'); | |
const cv = require('opencv4nodejs'); | |
const fs = require('fs'); | |
const EROSION_SIZE = 1; | |
const EROSION_MODE = cv.MORPH_ERODE; | |
const SCALE = 2; | |
const ITERATIONS = 2; | |
const ANCHOR = new cv.Point2(EROSION_SIZE, EROSION_SIZE); | |
const dirs = fs.readdirSync('captchas'); | |
function runTesseract() { | |
return Tesseract.recognize('output_edited_opencv.png', { | |
tessedit_char_whitelist: '0123456789', | |
tessedit_char_blacklist: ' ', | |
tessedit_pageseg_mode: 5, | |
tessedit_ocr_engine_mode: 1, | |
}).then(result => console.log(String(result.text).replace(/[\s\n\r]/ig, ''))); | |
} | |
sharp(`./captchas/${dirs[6]}`) | |
.negate() | |
.removeAlpha() | |
.threshold(50, { grayscale: true }) | |
.toFile('output_edited.png') | |
.then(() => cv.imreadAsync('output_edited.png')) | |
.then(img => img.resizeAsync( | |
SCALE * img.sizes[0], | |
SCALE * img.sizes[1], | |
2, 2, | |
cv.INTER_NEAREST, | |
)) | |
// .then(img => img.erodeAsync(new cv.Mat(), ANCHOR, ITERATIONS, EROSION_MODE)) | |
.then(img => img.morphologyExAsync(new cv.Mat(), cv.MORPH_OPEN, ANCHOR, ITERATIONS, EROSION_MODE)) | |
.then(img => cv.imwriteAsync('output_edited_opencv.png', img)) | |
.then(runTesseract); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment