Skip to content

Instantly share code, notes, and snippets.

@cmin764
Created October 16, 2023 16:01
Show Gist options
  • Select an option

  • Save cmin764/80b0c5f376dc2ea216364659c1bcdf57 to your computer and use it in GitHub Desktop.

Select an option

Save cmin764/80b0c5f376dc2ea216364659c1bcdf57 to your computer and use it in GitHub Desktop.
`RPA.Desktop`: Enhance images for better OCR
import time
import cv2
import numpy
from PIL import ImageOps, Image
from RPA.core.geometry import Region
from RPA.Desktop import Desktop
from RPA.Desktop.keywords import keyword, screen, HAS_RECOGNITION
from RPA.Desktop.keywords.text import TextKeywords
if HAS_RECOGNITION:
from RPA.recognition import ocr # pylint: disable=no-name-in-module
def ensure_recognition():
if not HAS_RECOGNITION:
raise ValueError(
"Keyword requires OCR features, please install the "
"`rpaframework-recognition` package"
)
def pil2cv(image):
if isinstance(image, Image.Image):
return cv2.cvtColor(numpy.array(image), cv2.COLOR_RGB2BGR)
return image
def cv2pil(image):
if isinstance(image, Image.Image):
return image
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return Image.fromarray(image)
class ExtendedTextKeywords(TextKeywords):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._pre_processors = []
@keyword
def configure_pre_processing(
self,
enlarge: float = None,
contrast: int = None,
black_and_white: bool = False,
denoise: bool = False,
blur: int = None,
):
pre_processors = []
if enlarge:
def enlarger(image):
image = cv2pil(image)
width, height = image.size
factor = max(1.0, float(enlarge / width))
new_size = int(factor * width), int(factor * height)
return image.resize(new_size, Image.ANTIALIAS)
pre_processors.append(enlarger)
if contrast:
def contraster(image):
image = cv2pil(image)
factor = (259 * (contrast + 255)) / (255 * (259 - contrast))
def pixel_contrast(c):
return 128 + factor * (c - 128)
return image.point(pixel_contrast)
pre_processors.append(contraster)
if denoise:
def denoiser(image):
image = pil2cv(image)
return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)
pre_processors.append(denoiser)
if black_and_white:
def grayscaler(image):
image = pil2cv(image)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(
image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
)[1]
return image
pre_processors.append(grayscaler)
if blur:
def blurer(image):
image = pil2cv(image)
return cv2.medianBlur(image, blur)
pre_processors.append(blurer)
self._pre_processors = pre_processors
def _pre_process(self, image):
for pre_processor in self._pre_processors:
image = pre_processor(image)
return cv2pil(image)
@keyword
def read_text(self, locator: str = None, invert: bool = False):
ensure_recognition()
if locator is not None:
element = self.ctx.wait_for_element(locator)
if not isinstance(element, Region):
raise ValueError("Locator must resolve to a region")
self.logger.info("Reading text from element: %s", element)
image = screen.grab(element)
else:
self.logger.info("Reading text from screen")
image = screen.grab()
image = self._pre_process(image)
screen.log_image(image)
if invert:
image = ImageOps.invert(image)
start_time = time.time()
text = ocr.read(image)
self.logger.info("Read text in %.2f seconds", time.time() - start_time)
return text
class ExtendedDesktop(Desktop):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
override = [ExtendedTextKeywords(self)]
self.add_library_components(override)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment