Created
November 11, 2024 10:57
-
-
Save BaderSZ/ac7b28e2d77ddc2e68b84659447062ba to your computer and use it in GitHub Desktop.
OCR an input image using Google Vision API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
OCR an image using Google Vision API. | |
Requires: | |
- google-cloud-vision = "^3.8.0" | |
usage: googleocr.py [-h] -i INPUT_FILE [-o OUTPUT_FILE] | |
Google Vision OCR script. You'll need an API key from Google Console. | |
Either set it in the environment via "GOOGLE_API_KEY=" or change the variable in the script. | |
options: | |
-h, --help show this help message and exit | |
-i, --input INPUT_FILE | |
Path to the image to OCR. | |
-o, --output OUTPUT_FILE | |
Output file to write to (Default is ./output/INPUT_BASENAME.txt, | |
must not exist) | |
(C) Bader Zaidan 2024 - GPL-2.0 | |
""" | |
from os import getenv | |
from argparse import ArgumentParser | |
from pathlib import Path | |
from google.cloud.vision import Image, AnnotateImageResponse, ImageAnnotatorClient | |
G_API_KEY = "MY_API_KEY" # get one from console.cloud.google.com | |
CLIENT_OPTS = { | |
"api_endpoint": "eu-vision.googleapis.com", | |
"api_key": getenv("GOOGLE_API_KEY", G_API_KEY), | |
} | |
def init_argparse() -> ArgumentParser: | |
"""Get init args.""" | |
parser: ArgumentParser = ArgumentParser( | |
prog=__package__, # ,"ocr.py", | |
epilog="(C) Bader Zaidan 2024 - GPL-2.0", | |
description="""Google Vision OCR script. You'll need an API key from Google Console. | |
Either set it in the environment via "GOOGLE_API_KEY=" or change the variable | |
in the script.""", | |
) | |
parser.add_argument( | |
"-i", | |
"--input", | |
dest="input_file", | |
required=True, | |
help="Path to the image to OCR.", | |
type=Path, | |
) | |
parser.add_argument( | |
"-o", | |
"--output", | |
dest="output_file", | |
required=False, | |
help="Output file to write to (Default is ./output/INPUT_BASENAME.txt, must not exist)", | |
type=Path, | |
default=None, | |
) | |
return parser.parse_args() | |
def vision_parse(path_arg: Path) -> AnnotateImageResponse: | |
"""Use Google Vision API to detect text in an image file.""" | |
client: ImageAnnotatorClient = ImageAnnotatorClient( | |
credentials=None, client_options=CLIENT_OPTS | |
) | |
image: Image = Image(content=path_arg.read_bytes()) | |
# pylint: disable=no-member | |
response: AnnotateImageResponse = client.document_text_detection(image=image) | |
if response.error.message: | |
# pylint: disable=broad-exception-raised | |
raise Exception(f"{response.error.message}") | |
return response | |
def get_output_path(img_path: Path, extension: str = ".pdf") -> Path: | |
"""Take filename (with rel path) and returns the output relative to the script.""" | |
output_path = Path("output") / (img_path.name.replace(img_path.suffix, extension)) | |
if output_path.exists(): | |
raise FileExistsError(f"File {output_path} already exists.") | |
return output_path | |
def main() -> None: | |
"""Parse args and OCR a file, output to text.""" | |
args: ArgumentParser = init_argparse() | |
input_image_path: Path = args.input_file | |
output_file_path: Path = ( | |
args.output_file | |
if args.output_file | |
else get_output_path(args.input_file, extension=".txt") | |
) | |
try: | |
parsed = vision_parse(input_image_path) | |
# output_file_path.write_text(str(parsed.full_text_annotation)) | |
output_file_path.write_text(str(parsed.text_annotation)) | |
except Exception as exp: # pylint: disable=broad-exception-caught | |
print(f"Failed to parse file '{input_image_path}':", exp) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment