|
#!/usr/bin/env -S deno run --allow-read --allow-write --allow-run=tesseract,gs |
|
|
|
import * as stdPath from "jsr:@std/path"; |
|
|
|
class VerboseCommand extends Deno.Command { |
|
#command: string[]; |
|
|
|
constructor(command: string | URL, options: Deno.CommandOptions) { |
|
super(command, options); |
|
this.#command = [typeof command === "string" ? command : command.href]; |
|
if (options.args !== undefined) { |
|
this.#command.push(...options.args); |
|
} |
|
} |
|
|
|
override output(): Promise<Deno.CommandOutput> { |
|
this.#log(); |
|
return super.output(); |
|
} |
|
|
|
override outputSync(): Deno.CommandOutput { |
|
this.#log(); |
|
return super.outputSync(); |
|
} |
|
|
|
override spawn(): Deno.ChildProcess { |
|
this.#log(); |
|
return super.spawn(); |
|
} |
|
|
|
#log() { |
|
console.log(...this.#command); |
|
} |
|
} |
|
|
|
interface GhostScriptOptions { |
|
infile: string; |
|
outfile: string; |
|
} |
|
|
|
// gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 "-dPDFSETTINGS=/ebook" -dNOPAUSE -dQUIET -dBATCH -sOutputFile=final.pdf output.pdf |
|
class GhostScriptCommand extends VerboseCommand { |
|
constructor({ infile, outfile }: GhostScriptOptions) { |
|
super("gs", { |
|
args: [ |
|
"-sDEVICE=pdfwrite", |
|
"-dCompatibilityLevel=1.4", |
|
// https://ghostscript.readthedocs.io/en/latest/VectorDevices.html#controls-and-features-specific-to-postscript-and-pdf-input |
|
// /screen,/printer,/prepress,/default |
|
"-dPDFSETTINGS=/ebook", |
|
"-dNOPAUSE", |
|
"-dQUIET", |
|
"-dBATCH", |
|
`-sOutputFile=${stdPath.resolve(outfile)}`, |
|
stdPath.resolve(infile), |
|
], |
|
}); |
|
} |
|
} |
|
|
|
interface TesseractOptions { |
|
language: string; |
|
infile: string; |
|
outfile: string; |
|
} |
|
|
|
class TesseractCommand extends VerboseCommand { |
|
constructor({ infile, language, outfile }: TesseractOptions) { |
|
const { dir, name } = stdPath.parse(outfile); |
|
super("tesseract", { |
|
args: [ |
|
"-l", |
|
language, |
|
stdPath.resolve(infile), |
|
stdPath.resolve(dir, name), |
|
"pdf", |
|
], |
|
}); |
|
} |
|
} |
|
|
|
interface CleanOptions { |
|
temp: string; |
|
} |
|
|
|
function clean({ temp }: CleanOptions) { |
|
Deno.removeSync(temp); |
|
console.log("Removed:", temp); |
|
} |
|
|
|
function main(args: string[]): number { |
|
if (args.length !== 2) { |
|
console.log( |
|
"Usage:", |
|
import.meta.filename ?? import.meta.url, |
|
"INFILE", |
|
"OUTFILE", |
|
); |
|
return 1; |
|
} |
|
const [infile, outfile] = args; |
|
const temp = Deno.makeTempFileSync({ |
|
prefix: "ebook", |
|
suffix: ".pdf", |
|
}); |
|
const tesseract = new TesseractCommand({ |
|
infile, |
|
language: "fra", |
|
outfile: temp, |
|
}); |
|
const tesseractOutput = tesseract.outputSync(); |
|
if (!tesseractOutput.success) { |
|
clean({ temp }); |
|
return tesseractOutput.code; |
|
} |
|
const gs = new GhostScriptCommand({ |
|
infile: temp, |
|
outfile, |
|
}); |
|
const gsOutput = gs.outputSync(); |
|
clean({ temp }); |
|
return gsOutput.code; |
|
} |
|
|
|
if (import.meta.main) { |
|
Deno.exit(main(Deno.args.slice())); |
|
} |