Skip to content

Instantly share code, notes, and snippets.

@jum-s
Created April 24, 2020 20:37
Show Gist options
  • Save jum-s/7e613498e7fcffeae198de837efe4a32 to your computer and use it in GitHub Desktop.
Save jum-s/7e613498e7fcffeae198de837efe4a32 to your computer and use it in GitHub Desktop.
const fs = require('fs')
const { createWorker } = require('tesseract.js')
module.exports = coverReader = () => {
// sample is a JSON dowload from :
// https://query.inventaire.io/#%23Editions%20without%20cover%0ASELECT%20%3Fcover%20%3Fiso%20WHERE%20%7B%0A%20%20%3Fedition%20wdt%3AP31%20wd%3AQ3331189%20.%0A%20%20%3Fedition%20wdt%3AP407%20%3Flang%20.%0A%20%20%3Fedition%20invp%3AP2%20%3Fcover%20.%0A%20%20SERVICE%20wdq%3Asparql%20%7B%20%3Flang%20wdt%3AP220%20%3Fiso%20.%20%7D%0A%7D%0ALIMIT%20100
const file = "./img_samples2.json"
const samples = JSON.parse(fs.readFileSync(file))
sequentialRead(samples)
}
const sequentialRead = samples => {
results = []
const readNext = async results => {
const nextSample = samples.shift()
if (nextSample == null) {
console.log("results : ",results)
return results
}
await ocrRead(nextSample, results)
return readNext(results)
}
readNext(results)
return results
}
const ocrRead = async (sample, results) => {
if (!results) { return }
const { cover: coverUrl, iso } = sample
worker = createWorker()
await worker.load()
await worker.loadLanguage(iso)
await worker.initialize(iso)
result = await worker.recognize(coverUrl)
sample.text = result.data.text
results.push(sample)
await worker.terminate()
}
coverReader()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment