Created
November 30, 2023 22:10
-
-
Save nekiee13/2fdc9f404b194abb58f2246eb5891a62 to your computer and use it in GitHub Desktop.
Unexpected Layoutparser output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import pandas as pd | |
import json | |
from PIL import Image | |
from matplotlib import pyplot as plt | |
import pytesseract | |
from layoutparser.models.detectron2.layoutmodel import Detectron2LayoutModel | |
from layoutparser.elements import Layout, TextBlock, Rectangle | |
from layoutparser.file_utils import is_torch_cuda_available #, PathManager | |
import warnings | |
from typing import Union | |
#Initialize model with local paths - set Detectron2LayoutModel Weights & configuration | |
#PubLayNet - mask_rcnn_R_50_FPN_3x | |
#config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\config.yml" | |
#model_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\model_final.pth" | |
#PubLayNet - mask_rcnn_X_101_32x8d_FPN_3x | |
config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\config.yaml" | |
model_path = "D:\\PDF\\vLayout\\xPrj\models\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\model_final.pth" | |
#PrimaLayout - mask_rcnn_R_50_FPN_3x | |
#config_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\config.yaml" | |
#model_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\model_final.pth" | |
model = Detectron2LayoutModel(config_path=config_path, model_path=model_path) | |
# Directories | |
input_dir = "D:\\PDF\\vLayout\\xPrj\\DocsIn" | |
output_dir = "D:\\PDF\\vLayout\\xPrj\\DocsOut" | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
# Visualization function | |
def draw_box(image, layout, show_element_type=True, show_element_id=True, box_width=2, color_map={}): | |
plt.figure(figsize=(10, 10)) | |
plt.imshow(image) | |
for idx, block in enumerate(layout): | |
color = color_map.get(block.type, 'red') | |
plt.gca().add_patch(plt.Rectangle( | |
(block.coordinates[0], block.coordinates[1]), | |
block.coordinates[2] - block.coordinates[0], | |
block.coordinates[3] - block.coordinates[1], | |
fill=False, | |
edgecolor=color, | |
linewidth=box_width) | |
) | |
if show_element_type or show_element_id: | |
text = f"{block.type} {idx}" if show_element_id else block.type | |
plt.text(block.coordinates[0], block.coordinates[1], text, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5)) | |
plt.axis('off') | |
plt.show() | |
# OCR function | |
def perform_ocr(image, layout): | |
ocr_data = [] | |
for block in layout: | |
x1, y1, x2, y2 = map(int, block.coordinates) | |
cropped_image = image[y1:y2, x1:x2] | |
text = pytesseract.image_to_string(cropped_image) | |
ocr_data.append({'block_type': block.type, 'text': text}) | |
return ocr_data | |
# Save results | |
def save_results(ocr_data, output_dir, base_filename): | |
csv_path = os.path.join(output_dir, f"{base_filename}_OCRexport.csv") | |
json_path = os.path.join(output_dir, f"{base_filename}_OCRexport.json") | |
# Save to CSV | |
pd.DataFrame(ocr_data).to_csv(csv_path, index=False) | |
# Save to JSON | |
with open(json_path, 'w') as f: | |
json.dump(ocr_data, f) | |
# Process each image | |
for filename in os.listdir(input_dir): | |
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')): | |
try: | |
image_path = os.path.join(input_dir, filename) | |
image = Image.open(image_path) | |
processed_image = np.array(image) | |
layout = model.detect(processed_image) | |
ocr_data = perform_ocr(processed_image, layout) | |
base_filename = os.path.splitext(filename)[0] | |
save_results(ocr_data, output_dir, base_filename) | |
draw_box(processed_image, layout) | |
except Exception as e: | |
print(f"Error processing {filename}: {e}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment