Last active
November 17, 2021 17:11
-
-
Save impaachu/d7d9cadbc918a6c5f761553468592617 to your computer and use it in GitHub Desktop.
OCR using Tesseract.js library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<title></title> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
<script src='https://unpkg.com/[email protected]/dist/tesseract.min.js'></script> | |
</head> | |
<body> | |
<table style="text-align: center"> | |
<tr> | |
<td><b><u>Video Input</u></b></td> | |
<td><b><u>Snapshot</u></b></td> | |
</tr> | |
<tr> | |
<td> | |
<video id="videoele" style="width: 400px; height: 300px;" autoplay playsinline></video> | |
</td> | |
<td style="min-width: 400px;"> | |
<img id="imageele" src="" style="max-width: 400px; max-height: 200px;"> | |
</td> | |
</tr> | |
<tr> | |
<td><b><u>Status</u></b></td> | |
<td><b><u>Output</u></b></td> | |
</tr> | |
<tr style="height: 100px;"> | |
<td> | |
<div id="ocr_status"> </div> | |
</td> | |
<td style="text-align: justify;"> | |
<div id="ocr_results"> </div> | |
</td> | |
</tr> | |
<tr> | |
<td colspan="2"> | |
<button style="padding: 1em" type="button" id="btnele"><b>Run OCR</b></button> | |
</td> | |
</tr> | |
</table> | |
<canvas id="canvasele" style="display:none;"></canvas> | |
<script> | |
const constraints = { | |
video: {'facingMode':'environment',width: {min: 1280}, height: {min: 720}} | |
}; | |
const video = document.getElementById('videoele'); | |
const img = document.getElementById('imageele'); | |
const canvas = document.createElement('canvas'); | |
const screenshotButton = document.getElementById('btnele'); | |
function hasGetUserMedia() { | |
return !!(navigator.mediaDevices && | |
navigator.mediaDevices.getUserMedia); | |
} | |
if (hasGetUserMedia()) { | |
// Good to go! | |
navigator.mediaDevices.getUserMedia(constraints).then(gotStream).catch(handleError); | |
} else { | |
alert('getUserMedia() is not supported by your browser'); | |
} | |
function gotStream(stream) { | |
window.stream = stream; // make stream available to console | |
video.srcObject = stream; | |
} | |
function handleError(error) { | |
console.log(error); | |
} | |
screenshotButton.onclick = video.onclick = function() { | |
document.getElementById("ocr_results").innerText = ""; | |
canvas.width = video.videoWidth; | |
canvas.height = video.videoHeight; | |
canvas.getContext('2d').drawImage(video, 0, 0); | |
// Other browsers will fall back to image/png | |
img.src = canvas.toDataURL('image/png'); | |
runOCR(img.src); | |
}; | |
function runOCR(url) { | |
const worker = new Tesseract.TesseractWorker(); | |
worker.recognize(url) | |
.then(function(result) { | |
document.getElementById("ocr_results") | |
.innerText = result.text; | |
}).progress(function(result) { | |
document.getElementById("ocr_status") | |
.innerText = result["status"] + " (" + | |
(result["progress"] * 100) + "%)"; | |
}); | |
} | |
</script> | |
</body> | |
</html> |
Hi! how use worker.loadLanguage('eng'); support russian language? please help!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
great work with having the first running tesseract application that DOES the stuff i was looking for. You really made my day :)
it took me 4 days to find this.
Also. Do you have any idea why the <script> when loaded from a js file imported with a script tag does result in nothing?
would really appreciate your ideas on this.