Skip to content

Instantly share code, notes, and snippets.

@tsh-code
Created February 9, 2024 13:11
Show Gist options
  • Save tsh-code/28211910a2ef469e465e8edac13c0a6c to your computer and use it in GitHub Desktop.
Save tsh-code/28211910a2ef469e465e8edac13c0a6c to your computer and use it in GitHub Desktop.
ocr_notebook_map = {
"easyocr": "easyocr.ipynb",
"pytesseract": "pytesseract.ipynb",
"kerasocr": "kerasocr.ipynb",
"paddle": "paddle.ipynb",
"tensorflow": "tensorflow.ipynb"
}
def run_ocr(CONFIG):
if CONFIG.get("skip_ocr_processing", False):
print("Skipping OCR.")
return
notebook_name = ocr_notebook_map.get(CONFIG["ocr_library"])
if notebook_name:
%run ocr_module/{notebook_name}
else:
print(f"Unknown OCR library: {CONFIG['ocr_library']}")
file_list = get_file_list(CONFIG["directory"])
write_results(file_list, CONFIG["output_file"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment