BaderSZ · November 11, 2024 10:57
diff --git a/googleocr.py b/googleocr.py
 #!/usr/bin/env python3
 """
 OCR an image using Google Vision API.

 Requires:
  - google-cloud-vision = "^3.8.0"

 usage: googleocr.py [-h] -i INPUT_FILE [-o OUTPUT_FILE]

 Google Vision OCR script. You'll need an API key from Google Console.
 Either set it in the environment via "GOOGLE_API_KEY=" or change the variable in the script.

 options:
  -h, --help            show this help message and exit
  -i, --input INPUT_FILE
                        Path to the image to OCR.
  -o, --output OUTPUT_FILE
                        Output file to write to (Default is ./output/INPUT_BASENAME.txt,
                        must not exist)

 (C) Bader Zaidan 2024 - GPL-2.0
 """

 from os import getenv

 from argparse import ArgumentParser
 from pathlib import Path

 from google.cloud.vision import Image, AnnotateImageResponse, ImageAnnotatorClient


 G_API_KEY = "MY_API_KEY"  # get one from console.cloud.google.com
 CLIENT_OPTS = {
    "api_endpoint": "eu-vision.googleapis.com",
    "api_key": getenv("GOOGLE_API_KEY", G_API_KEY),
 }


 def init_argparse() -> ArgumentParser:
    """Get init args."""
    parser: ArgumentParser = ArgumentParser(
        prog=__package__,  # ,"ocr.py",
        epilog="(C) Bader Zaidan 2024 - GPL-2.0",
        description="""Google Vision OCR script. You'll need an API key from Google Console.
        Either set it in the environment via "GOOGLE_API_KEY=" or change the variable
        in the script.""",
    )

    parser.add_argument(
        "-i",
        "--input",
        dest="input_file",
        required=True,
        help="Path to the image to OCR.",
        type=Path,
    )

    parser.add_argument(
        "-o",
        "--output",
        dest="output_file",
        required=False,
        help="Output file to write to (Default is ./output/INPUT_BASENAME.txt, must not exist)",
        type=Path,
        default=None,
    )

    return parser.parse_args()


 def vision_parse(path_arg: Path) -> AnnotateImageResponse:
    """Use Google Vision API to detect text in an image file."""
    client: ImageAnnotatorClient = ImageAnnotatorClient(
        credentials=None, client_options=CLIENT_OPTS
    )

    image: Image = Image(content=path_arg.read_bytes())

    # pylint: disable=no-member
    response: AnnotateImageResponse = client.document_text_detection(image=image)

    if response.error.message:
        # pylint: disable=broad-exception-raised
        raise Exception(f"{response.error.message}")

    return response


 def get_output_path(img_path: Path, extension: str = ".pdf") -> Path:
    """Take filename (with rel path) and returns the output relative to the script."""
    output_path = Path("output") / (img_path.name.replace(img_path.suffix, extension))

    if output_path.exists():
        raise FileExistsError(f"File {output_path} already exists.")

    return output_path


 def main() -> None:
    """Parse args and OCR a file, output to text."""

    args: ArgumentParser = init_argparse()

    input_image_path: Path = args.input_file
    output_file_path: Path = (
        args.output_file
        if args.output_file
        else get_output_path(args.input_file, extension=".txt")
    )

    try:
        parsed = vision_parse(input_image_path)

        # output_file_path.write_text(str(parsed.full_text_annotation))
        output_file_path.write_text(str(parsed.text_annotation))

    except Exception as exp:  # pylint: disable=broad-exception-caught
        print(f"Failed to parse file '{input_image_path}':", exp)


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	OCR an image using Google Vision API.

	Requires:
	- google-cloud-vision = "^3.8.0"

	usage: googleocr.py [-h] -i INPUT_FILE [-o OUTPUT_FILE]

	Google Vision OCR script. You'll need an API key from Google Console.
	Either set it in the environment via "GOOGLE_API_KEY=" or change the variable in the script.

	options:
	-h, --help show this help message and exit
	-i, --input INPUT_FILE
	Path to the image to OCR.
	-o, --output OUTPUT_FILE
	Output file to write to (Default is ./output/INPUT_BASENAME.txt,
	must not exist)

	(C) Bader Zaidan 2024 - GPL-2.0
	"""

	from os import getenv

	from argparse import ArgumentParser
	from pathlib import Path

	from google.cloud.vision import Image, AnnotateImageResponse, ImageAnnotatorClient


	G_API_KEY = "MY_API_KEY" # get one from console.cloud.google.com
	CLIENT_OPTS = {
	"api_endpoint": "eu-vision.googleapis.com",
	"api_key": getenv("GOOGLE_API_KEY", G_API_KEY),
	}


	def init_argparse() -> ArgumentParser:
	"""Get init args."""
	parser: ArgumentParser = ArgumentParser(
	prog=__package__, # ,"ocr.py",
	epilog="(C) Bader Zaidan 2024 - GPL-2.0",
	description="""Google Vision OCR script. You'll need an API key from Google Console.
	Either set it in the environment via "GOOGLE_API_KEY=" or change the variable
	in the script.""",
	)

	parser.add_argument(
	"-i",
	"--input",
	dest="input_file",
	required=True,
	help="Path to the image to OCR.",
	type=Path,
	)

	parser.add_argument(
	"-o",
	"--output",
	dest="output_file",
	required=False,
	help="Output file to write to (Default is ./output/INPUT_BASENAME.txt, must not exist)",
	type=Path,
	default=None,
	)

	return parser.parse_args()


	def vision_parse(path_arg: Path) -> AnnotateImageResponse:
	"""Use Google Vision API to detect text in an image file."""
	client: ImageAnnotatorClient = ImageAnnotatorClient(
	credentials=None, client_options=CLIENT_OPTS
	)

	image: Image = Image(content=path_arg.read_bytes())

	# pylint: disable=no-member
	response: AnnotateImageResponse = client.document_text_detection(image=image)

	if response.error.message:
	# pylint: disable=broad-exception-raised
	raise Exception(f"{response.error.message}")

	return response


	def get_output_path(img_path: Path, extension: str = ".pdf") -> Path:
	"""Take filename (with rel path) and returns the output relative to the script."""
	output_path = Path("output") / (img_path.name.replace(img_path.suffix, extension))

	if output_path.exists():
	raise FileExistsError(f"File {output_path} already exists.")

	return output_path


	def main() -> None:
	"""Parse args and OCR a file, output to text."""

	args: ArgumentParser = init_argparse()

	input_image_path: Path = args.input_file
	output_file_path: Path = (
	args.output_file
	if args.output_file
	else get_output_path(args.input_file, extension=".txt")
	)

	try:
	parsed = vision_parse(input_image_path)

	# output_file_path.write_text(str(parsed.full_text_annotation))
	output_file_path.write_text(str(parsed.text_annotation))

	except Exception as exp: # pylint: disable=broad-exception-caught
	print(f"Failed to parse file '{input_image_path}':", exp)


	if __name__ == "__main__":
	main()