Skip to content

Instantly share code, notes, and snippets.

@dhruvilp
Created January 15, 2026 03:58
Show Gist options
  • Select an option

  • Save dhruvilp/40f875ed68ad31570d8fa199aea329f4 to your computer and use it in GitHub Desktop.

Select an option

Save dhruvilp/40f875ed68ad31570d8fa199aea329f4 to your computer and use it in GitHub Desktop.
Textract reponse conversion experiments
import boto3
import io
import base64
from PIL import Image # use Pillow
def get_base64_from_layout(image_path, textract_response):
# Load the original image to get pixel dimensions
with open(image_path, 'rb') as f:
img_bytes = f.read()
image = Image.open(io.BytesIO(img_bytes))
width, height = image.size
extracted_images = []
extracted_tables = []
# Iterate through blocks to find Tables and Layout Figures
for block in textract_response.get('Blocks', []):
block_type = block.get('BlockType')
# Identify Tables or Layout Figures (Images)
# Note: 'LAYOUT_FIGURE' is returned when using the LAYOUT feature
if block_type in ['TABLE', 'LAYOUT_FIGURE']:
bbox = block['Geometry']['BoundingBox']
# Convert normalized coordinates to pixel coordinates
left = bbox['Left'] * width
top = bbox['Top'] * height
right = (bbox['Left'] + bbox['Width']) * width
bottom = (bbox['Top'] + bbox['Height']) * height
# Crop the image
crop = image.crop((left, top, right, bottom))
# Convert crop to base64 string
buffered = io.BytesIO()
crop.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
if block_type == 'TABLE':
extracted_tables.append(img_str)
else:
extracted_images.append(img_str)
return {"images": extracted_images, "tables": extracted_tables}
# Example Usage
# response = textract_client.analyze_document(Document={'Bytes': img_bytes}, FeatureTypes=['TABLES', 'LAYOUT'])
# results = get_base64_from_layout("my_document.jpg", response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment