dhruvilp · January 15, 2026 03:58
diff --git a/textract_to_images.py b/textract_to_images.py
 import boto3
 import io
 import base64
 from PIL import Image # use Pillow

 def get_base64_from_layout(image_path, textract_response):
    # Load the original image to get pixel dimensions
    with open(image_path, 'rb') as f:
        img_bytes = f.read()
    image = Image.open(io.BytesIO(img_bytes))
    width, height = image.size

    extracted_images = []
    extracted_tables = []

    # Iterate through blocks to find Tables and Layout Figures
    for block in textract_response.get('Blocks', []):
        block_type = block.get('BlockType')
        
        # Identify Tables or Layout Figures (Images)
        # Note: 'LAYOUT_FIGURE' is returned when using the LAYOUT feature
        if block_type in ['TABLE', 'LAYOUT_FIGURE']:
            bbox = block['Geometry']['BoundingBox']
            
            # Convert normalized coordinates to pixel coordinates
            left = bbox['Left'] * width
            top = bbox['Top'] * height
            right = (bbox['Left'] + bbox['Width']) * width
            bottom = (bbox['Top'] + bbox['Height']) * height
            
            # Crop the image
            crop = image.crop((left, top, right, bottom))
            
            # Convert crop to base64 string
            buffered = io.BytesIO()
            crop.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
            
            if block_type == 'TABLE':
                extracted_tables.append(img_str)
            else:
                extracted_images.append(img_str)
                
    return {"images": extracted_images, "tables": extracted_tables}

 # Example Usage
 # response = textract_client.analyze_document(Document={'Bytes': img_bytes}, FeatureTypes=['TABLES', 'LAYOUT'])
 # results = get_base64_from_layout("my_document.jpg", response)
	import boto3
	import io
	import base64
	from PIL import Image # use Pillow

	def get_base64_from_layout(image_path, textract_response):
	# Load the original image to get pixel dimensions
	with open(image_path, 'rb') as f:
	img_bytes = f.read()
	image = Image.open(io.BytesIO(img_bytes))
	width, height = image.size

	extracted_images = []
	extracted_tables = []

	# Iterate through blocks to find Tables and Layout Figures
	for block in textract_response.get('Blocks', []):
	block_type = block.get('BlockType')

	# Identify Tables or Layout Figures (Images)
	# Note: 'LAYOUT_FIGURE' is returned when using the LAYOUT feature
	if block_type in ['TABLE', 'LAYOUT_FIGURE']:
	bbox = block['Geometry']['BoundingBox']

	# Convert normalized coordinates to pixel coordinates
	left = bbox['Left'] * width
	top = bbox['Top'] * height
	right = (bbox['Left'] + bbox['Width']) * width
	bottom = (bbox['Top'] + bbox['Height']) * height

	# Crop the image
	crop = image.crop((left, top, right, bottom))

	# Convert crop to base64 string
	buffered = io.BytesIO()
	crop.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')

	if block_type == 'TABLE':
	extracted_tables.append(img_str)
	else:
	extracted_images.append(img_str)

	return {"images": extracted_images, "tables": extracted_tables}

	# Example Usage
	# response = textract_client.analyze_document(Document={'Bytes': img_bytes}, FeatureTypes=['TABLES', 'LAYOUT'])
	# results = get_base64_from_layout("my_document.jpg", response)
No results found