Tesseract.js based OCR from Webcam
Last active
March 8, 2021 17:07
-
-
Save barakplasma/40b8a8aea8937cc9c1e7de78a30e5f4a to your computer and use it in GitHub Desktop.
Tesseract OCR from Webcam
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>Tesseract.js Video Streaming Recognition</title> | |
<link rel="stylesheet" href="style.css"> | |
<script src='https://unpkg.com/[email protected]/dist/tesseract.min.js'></script> | |
</head> | |
<body> | |
<div id="root"> | |
<video id="poem-video" width="640" height="360" crossorigin="anonymous"> | |
</video> | |
<div id="sep"></div> | |
<div id="messages"> | |
</div> | |
</div> | |
<script> | |
const { createWorker, createScheduler } = Tesseract; | |
const scheduler = createScheduler(); | |
const video = document.getElementById('poem-video'); | |
if (navigator.mediaDevices.getUserMedia) { | |
navigator.mediaDevices.getUserMedia({ video: true }) | |
.then(function (stream) { | |
video.srcObject = stream; | |
}) | |
.catch(function (err0r) { | |
console.log("Something went wrong!"); | |
}); | |
} | |
const messages = document.getElementById('messages'); | |
let timerId = null; | |
const addMessage = (m, bold) => { | |
let msg = `<p>${m}</p>`; | |
if (bold) { | |
msg = `<p class="bold">${m}</p>`; | |
} | |
messages.innerHTML += msg; | |
messages.scrollTop = messages.scrollHeight; | |
} | |
const doOCR = async () => { | |
const c = document.createElement('canvas'); | |
c.width = 640; | |
c.height = 360; | |
c.getContext('2d').drawImage(video, 0, 0, 640, 360); | |
const start = new Date(); | |
const { data: { text } } = await scheduler.addJob('recognize', c); | |
const end = new Date() | |
addMessage(`[${start.getMinutes()}:${start.getSeconds()} - ${end.getMinutes()}:${end.getSeconds()}], ${(end - start) / 1000} s`); | |
text.split('\n').forEach((line) => { | |
addMessage(line); | |
}); | |
}; | |
(async () => { | |
addMessage('Initializing Tesseract.js'); | |
for (let i = 0; i < 4; i++) { | |
const worker = createWorker(); | |
await worker.load(); | |
await worker.loadLanguage('eng'); | |
await worker.initialize('eng'); | |
scheduler.addWorker(worker); | |
} | |
addMessage('Initialized Tesseract.js'); | |
video.addEventListener('play', () => { | |
timerId = setInterval(doOCR, 1000); | |
}); | |
video.addEventListener('pause', () => { | |
clearInterval(timerId); | |
}); | |
addMessage('Now you can play the video. :)'); | |
video.controls = true; | |
})(); | |
</script> | |
</body> | |
</html> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "tesseract.js-video", | |
"version": "1.0.0", | |
"description": "", | |
"main": "index.js", | |
"scripts": { | |
"start": "live-server" | |
}, | |
"author": "", | |
"license": "ISC", | |
"dependencies": { | |
"live-server": "latest" | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
html, body { | |
width: 100%; | |
height: 100%; | |
margin: 0; | |
} | |
.bold { | |
font-weight: bold; | |
} | |
#root { | |
width: 100%; | |
height: 100%; | |
display: flex; | |
flex-direction: row; | |
justify-content: center; | |
align-items: center; | |
padding: 0px 16px; | |
} | |
#sep { | |
width: 16px; | |
} | |
#messages { | |
width: 640px; | |
height: 360px; | |
overflow: auto; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment