Created
February 9, 2024 13:11
-
-
Save tsh-code/28211910a2ef469e465e8edac13c0a6c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ocr_notebook_map = { | |
"easyocr": "easyocr.ipynb", | |
"pytesseract": "pytesseract.ipynb", | |
"kerasocr": "kerasocr.ipynb", | |
"paddle": "paddle.ipynb", | |
"tensorflow": "tensorflow.ipynb" | |
} | |
def run_ocr(CONFIG): | |
if CONFIG.get("skip_ocr_processing", False): | |
print("Skipping OCR.") | |
return | |
notebook_name = ocr_notebook_map.get(CONFIG["ocr_library"]) | |
if notebook_name: | |
%run ocr_module/{notebook_name} | |
else: | |
print(f"Unknown OCR library: {CONFIG['ocr_library']}") | |
file_list = get_file_list(CONFIG["directory"]) | |
write_results(file_list, CONFIG["output_file"]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment