Skip to content

Instantly share code, notes, and snippets.

@tin2tin
Last active November 18, 2024 22:28
Show Gist options
  • Save tin2tin/f9fae59d5c6386b0e2a85cd287a9d966 to your computer and use it in GitHub Desktop.
Save tin2tin/f9fae59d5c6386b0e2a85cd287a9d966 to your computer and use it in GitHub Desktop.
OCR for the Text Editor
bl_info = {
"name": "OCR Add-on",
"blender": (3, 0, 0),
"category": "Text Editor",
"author": "Your Name",
"version": (1, 0, 0),
"location": "Text Editor > Sidebar",
"description": "Add-on for OCR with specified models"
}
import bpy
import os
import subprocess
import sys
from bpy.props import StringProperty
from bpy.types import Operator, Panel
import platform
# Define the dependencies
dependencies = [
"transformers==4.46.3",
"pandas==2.2.3",
"tiktoken",
"verovio",
"numpy>=2.1.3",
"accelerate"
]
# Function to print debug messages
def debug_print(message):
print(f"DEBUG: {message}")
def addon_script_path() -> str:
"""Return the path where the add-on script is located (addon directory)."""
addon_path = os.path.dirname(__file__) # Use __file__ to get the script directory
debug_print(f"Addon script path is: {addon_path}")
return addon_path
def venv_path(env_name="virtual_dependencies") -> str:
"""Define the path for the virtual environment directory in the add-on's folder."""
addon_path = addon_script_path()
env_path = os.path.join(addon_path, env_name) # Create virtual environment relative to add-on script
debug_print(f"Virtual environment path is: {env_path}")
return env_path
# Function to create and activate virtual environment
def create_venv():
venv_dir = venv_path()
# Ensure the add-on directory exists
addon_dir = addon_script_path()
if not os.path.exists(addon_dir):
try:
os.makedirs(addon_dir)
debug_print(f"Created add-on directory: {addon_dir}")
except Exception as e:
debug_print(f"Failed to create add-on directory: {e}")
return None
# Remove existing virtual environment if it exists to ensure a clean setup
if os.path.exists(venv_dir):
try:
import shutil
shutil.rmtree(venv_dir)
debug_print(f"Removed existing virtual environment at: {venv_dir}")
except Exception as e:
debug_print(f"Failed to remove existing virtual environment: {e}")
return None
# Create virtual environment
try:
subprocess.check_call([sys.executable, "-m", "venv", venv_dir])
debug_print(f"Created virtual environment at: {venv_dir}")
except subprocess.CalledProcessError as e:
debug_print(f"Failed to create virtual environment: {e}")
return None
except Exception as e:
debug_print(f"An error occurred while creating virtual environment: {e}")
return None
# Return the path to the virtual environment's python executable
if os.name == 'nt': # Windows
venv_python = os.path.join(venv_dir, "Scripts", "python.exe")
else: # Unix or MacOS
venv_python = os.path.join(venv_dir, "bin", "python")
if not os.path.exists(venv_python):
debug_print(f"Virtual environment Python executable not found: {venv_python}")
return None
return venv_python
# Function to install dependencies in the virtual environment
def install_dependencies():
venv_python = create_venv()
if venv_python is None:
return "Failed to create virtual environment."
# Install dependencies using the virtual environment's python
try:
subprocess.check_call(
[
venv_python,
"-m",
"pip",
"install",
*dependencies,
"--upgrade"
]
)
subprocess.check_call(
[
venv_python,
"-m",
"pip",
"install",
"torch==2.1.2+cu118",
"--index-url",
"https://download.pytorch.org/whl/cu118",
"--no-warn-script-location",
#"--upgrade",
]
)
subprocess.check_call(
[
venv_python,
"-m",
"pip",
"install",
"torchvision==0.16.2+cu118",
"--index-url",
"https://download.pytorch.org/whl/cu118",
"--no-warn-script-location",
#"--upgrade",
]
)
subprocess.check_call(
[
venv_python,
"-m",
"pip",
"install",
"torchaudio==2.1.2+cu118",
"--index-url",
"https://download.pytorch.org/whl/cu118",
"--no-warn-script-location",
#"--upgrade",
]
)
subprocess.check_call(
[
venv_python,
"-m",
"pip",
"list",
]
)
debug_print(f"Dependencies installed successfully using: {venv_python}")
return "Dependencies installed successfully!"
except subprocess.CalledProcessError as e:
debug_print(f"Failed to install dependencies: {e}")
return f"Failed to install dependencies: {str(e)}"
except Exception as e:
debug_print(f"An error occurred while installing dependencies: {e}")
return f"An error occurred while installing dependencies: {str(e)}"
def activate_virtualenv():
"""Activate the virtual environment for the add-on."""
venv_p = venv_path()#os.path.join(bpy.utils.user_resource("SCRIPTS"), "addons", "ocr", "virtual_dependencies")
if not os.path.exists(venv_p):
print(f"Virtual environment path not found: {venv_p}")
return False
# Define the correct paths for Windows or Unix-based systems
if platform.system() == 'Windows':
scripts_path = os.path.join(venv_p, "Scripts")
python_exe = os.path.join(scripts_path, "python.exe")
else:
bin_path = os.path.join(venv_p, "bin")
python_exe = os.path.join(bin_p, "python")
if not os.path.exists(python_exe):
print(f"Python executable not found at: {python_exe}")
return False
# Set the virtual environment's Python executable as the current Python
sys.executable = python_exe
# Modify the PATH and PYTHONPATH to use the virtual environment's directories
if platform.system() == "Windows":
os.environ["PATH"] = scripts_path + os.pathsep + os.environ["PATH"]
else:
os.environ["PATH"] = bin_path + os.pathsep + os.environ["PATH"]
# Update sys.path to include site-packages from the virtual environment
site_packages_path = os.path.join(venv_p, 'lib', 'site-packages')
sys.path.insert(0, site_packages_path)
print(f"Virtual environment activated: {venv_p}")
return venv_p
# Function to perform OCR on a single image
def perform_single_ocr(image_file, tokenizer, model, ocr_type='ocr', ocr_box=None, ocr_color=None, render=False, save_render_file=None):
if ocr_type in ['ocr', 'format']:
if ocr_box:
res = model.chat(tokenizer, image_file, ocr_type=ocr_type, ocr_box=ocr_box)
elif ocr_color:
res = model.chat(tokenizer, image_file, ocr_type=ocr_type, ocr_color=ocr_color)
else:
res = model.chat(tokenizer, image_file, ocr_type=ocr_type)
elif ocr_type in ['ocr_crop', 'format_crop']:
res = model.chat_crop(tokenizer, image_file, ocr_type=ocr_type)
else:
return "Invalid OCR type"
if render and save_render_file:
res = model.chat(tokenizer, image_file, ocr_type=ocr_type, render=True, save_render_file=save_render_file)
return res
# Function to perform OCR on all images in a folder
def perform_ocr_on_folder(folder, ocr_type='ocr', ocr_box=None, ocr_color=None, render=False, save_render_file=None):
if not os.path.isdir(folder):
return "Selected folder is not valid."
if not activate_virtualenv():
return "Could not activate virtual environment!"
import torch
# Check if CUDA is available
if not torch.cuda.is_available():
return "CUDA is not available. Please ensure that CUDA is installed and configured correctly."
from transformers import AutoModel, AutoTokenizer
# Initialize the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
model = model.eval().cuda()
# Collect results
all_results = []
# Iterate through the folder in alphabetical order
for filename in sorted(os.listdir(folder)):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
image_file = os.path.join(folder, filename)
result = perform_single_ocr(image_file, tokenizer, model, ocr_type, ocr_box, ocr_color, render, save_render_file)
all_results.append((filename, result))
debug_print(f"OCR result for {filename}: {result}")
return all_results
# Operator to install dependencies
class OCR_OT_InstallDependencies(Operator):
bl_idname = "ocr.install_dependencies"
bl_label = "Install Dependencies"
bl_description = "Install required dependencies for OCR"
def execute(self, context):
result = install_dependencies()
self.report({'INFO'}, result)
return {'FINISHED'}
# Operator to select folder
class OCR_OT_SelectFolder(Operator):
bl_idname = "ocr.select_folder"
bl_label = "Select Folder"
bl_description = "Select the folder containing the image files"
directory: StringProperty(subtype="DIR_PATH")
def execute(self, context):
context.scene.ocr_folder = self.directory
self.report({'INFO'}, f"Selected folder: {self.directory}")
return {'FINISHED'}
def invoke(self, context, event):
context.window_manager.fileselect_add(self)
return {'RUNNING_MODAL'}
# Operator to perform OCR
class OCR_OT_PerformOCR(Operator):
bl_idname = "ocr.perform_ocr"
bl_label = "Perform OCR"
bl_description = "Perform OCR on all images in the selected folder"
def execute(self, context):
folder = context.scene.ocr_folder
if not folder:
self.report({'ERROR'}, "Please select a folder first")
return {'CANCELLED'}
venv_python = activate_virtualenv()
if venv_python is None:
self.report({'ERROR'}, "Virtual environment not created.")
return {'CANCELLED'}
# Ensure the virtual environment's site-packages is in the Python path
venv_site_packages = os.path.join(os.path.dirname(venv_python), "lib", "site-packages")
if os.name == 'nt': # Windows
venv_site_packages = os.path.join(os.path.dirname(venv_python), "Lib", "site-packages")
if venv_site_packages not in sys.path:
sys.path.append(venv_site_packages)
# Perform OCR using Blender's Python interpreter
result = perform_ocr_on_folder(folder)
if isinstance(result, str):
self.report({'ERROR'}, result)
return {'CANCELLED'}
collected_results = "\n".join([f"{filename}: {result}" for filename, result in result])
self.report({'INFO'}, "OCR completed for all images")
debug_print("Collected OCR results:")
debug_print(collected_results)
return {'FINISHED'}
# Panel to add buttons in Text Editor
class OCR_PT_Panel(Panel):
bl_label = "OCR Panel"
bl_idname = "TEXT_PT_ocr_panel"
bl_space_type = 'TEXT_EDITOR'
bl_region_type = 'UI'
bl_category = 'OCR'
def draw(self, context):
layout = self.layout
layout.operator("ocr.install_dependencies")
layout.operator("ocr.select_folder")
layout.operator("ocr.perform_ocr")
# Register classes
classes = (
OCR_OT_InstallDependencies,
OCR_OT_SelectFolder,
OCR_OT_PerformOCR,
OCR_PT_Panel
)
def register():
for cls in classes:
bpy.utils.register_class(cls)
bpy.types.Scene.ocr_folder = StringProperty(name="OCR Folder")
def unregister():
for cls in classes:
bpy.utils.unregister_class(cls)
del bpy.types.Scene.ocr_folder
if __name__ == "__main__":
register()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment