Created
April 18, 2025 15:30
-
-
Save bigsnarfdude/28fd30a8b80cf13012dc6eb690d2c092 to your computer and use it in GitHub Desktop.
image_segmentation_gemini_api.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Gemini API Image Mask Visualization</title> | |
<script type="module"> | |
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai"; | |
import { marked } from "https://esm.run/marked"; | |
function getApiKey() { | |
let apiKey = localStorage.getItem("GEMINI_API_KEY"); | |
if (!apiKey) { | |
apiKey = prompt("Please enter your Gemini API key:"); | |
if (apiKey) { | |
localStorage.setItem("GEMINI_API_KEY", apiKey); | |
} | |
} | |
return apiKey; | |
} | |
async function getGenerativeModel(params) { | |
const API_KEY = getApiKey(); | |
const genAI = new GoogleGenerativeAI(API_KEY); | |
return genAI.getGenerativeModel(params); | |
} | |
async function fileToGenerativePart(file) { | |
return new Promise((resolve) => { | |
const reader = new FileReader(); | |
reader.onloadend = () => resolve({ | |
inlineData: { | |
data: reader.result.split(",")[1], | |
mimeType: file.type | |
} | |
}); | |
reader.readAsDataURL(file); | |
}); | |
} | |
function resizeAndCompressImage(file) { | |
return new Promise((resolve) => { | |
const reader = new FileReader(); | |
reader.onload = function(event) { | |
const img = new Image(); | |
img.onload = function() { | |
const canvas = document.createElement('canvas'); | |
const ctx = canvas.getContext('2d'); | |
let width = img.width; | |
let height = img.height; | |
if (width > 1000) { | |
height = Math.round((height * 1000) / width); | |
width = 1000; | |
} | |
canvas.width = width; | |
canvas.height = height; | |
ctx.drawImage(img, 0, 0, width, height); | |
canvas.toBlob((blob) => { | |
resolve(new File([blob], "compressed_image.jpg", { type: "image/jpeg" })); | |
}, 'image/jpeg', 0.7); | |
}; | |
img.src = event.target.result; | |
}; | |
reader.readAsDataURL(file); | |
}); | |
} | |
async function processImageAndPrompt() { | |
const fileInput = document.getElementById('imageInput'); | |
const promptInput = document.getElementById('promptInput'); | |
const resultDiv = document.getElementById('result'); | |
const modelSelect = document.getElementById('modelSelect'); | |
if (!promptInput.value) { | |
alert('Please enter a prompt.'); | |
return; | |
} | |
resultDiv.innerHTML = 'Processing...'; | |
// Clear previous bounding box images | |
document.getElementById('boundingBoxImages').innerHTML = ''; | |
try { | |
const model = await getGenerativeModel({ model: modelSelect.value }); | |
let content = [promptInput.value]; | |
if (fileInput.files[0]) { | |
const compressedImage = await resizeAndCompressImage(fileInput.files[0]); | |
const imagePart = await fileToGenerativePart(compressedImage); | |
content.push(imagePart); | |
} | |
const result = await model.generateContent(content); | |
const response = await result.response; | |
const text = response.text(); | |
resultDiv.innerHTML = marked.parse(text); | |
if (fileInput.files[0]) { | |
// Extract box_2d + mask data only | |
const boxAndMaskData = extractBoxAndMask(text); | |
if (boxAndMaskData.length > 0) { | |
displayImageWithBoundingBoxesAndMasks(fileInput.files[0], boxAndMaskData); | |
} | |
} | |
} catch (error) { | |
resultDiv.innerHTML = `Error: ${error.message}`; | |
} | |
} | |
function extractBoxAndMask(text) { | |
// Look for patterns like "box_2d": [233, 603, 425, 861], "mask": "data:image/png;base64,..." | |
const regex = /"box_2d"\s*:\s*\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]\s*,\s*"mask"\s*:\s*"(data:image\/png;base64,[^"]+)"/g; | |
const results = []; | |
let match; | |
while ((match = regex.exec(text)) !== null) { | |
try { | |
// Extract the box_2d coordinates | |
const boxMatch = text.substring(match.index, match.index + 100).match(/"box_2d"\s*:\s*(\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\])/); | |
if (boxMatch && boxMatch[1]) { | |
const box = JSON.parse(boxMatch[1]); | |
const mask = match[1]; // This is the data:image/png;base64,... string | |
results.push({ box, mask }); | |
} | |
} catch (e) { | |
console.error("Failed to parse box and mask:", e); | |
} | |
} | |
return results; | |
} | |
function displayImageWithBoundingBoxesAndMasks(file, boxAndMaskData) { | |
const reader = new FileReader(); | |
reader.onload = function(event) { | |
const image = new Image(); | |
image.onload = function() { | |
const canvas = document.getElementById('canvas'); | |
canvas.width = image.width + 100; | |
canvas.height = image.height + 100; | |
const ctx = canvas.getContext('2d'); | |
// Draw the image | |
ctx.drawImage(image, 80, 20); | |
// Draw grid lines | |
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; // Red with 50% opacity | |
ctx.lineWidth = 1; | |
// Vertical grid lines | |
for (let i = 0; i <= 1000; i += 100) { | |
const x = 80 + i / 1000 * image.width; | |
ctx.beginPath(); | |
ctx.moveTo(x, 20); | |
ctx.lineTo(x, image.height + 20); | |
ctx.stroke(); | |
} | |
// Horizontal grid lines | |
for (let i = 0; i <= 1000; i += 100) { | |
const y = 20 + (1000 - i) / 1000 * image.height; | |
ctx.beginPath(); | |
ctx.moveTo(80, y); | |
ctx.lineTo(image.width + 80, y); | |
ctx.stroke(); | |
} | |
// Draw bounding boxes | |
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF']; | |
const boundingBoxImages = document.getElementById('boundingBoxImages'); | |
boundingBoxImages.innerHTML = ''; // Clear previous bounding box images | |
// Draw box_2d and mask data | |
boxAndMaskData.forEach((data, index) => { | |
const [ymin, xmin, ymax, xmax] = data.box; | |
/* ── normalise if the coordinates are absolute pixels ── */ | |
const norm = (ymin > 1000 || xmin > 1000 || ymax > 1000 || xmax > 1000) | |
? [ | |
(ymin / image.height) * 1000, | |
(xmin / image.width) * 1000, | |
(ymax / image.height) * 1000, | |
(xmax / image.width) * 1000 | |
] | |
: data.box; | |
const [nYmin, nXmin, nYmax, nXmax] = norm.map(v => v / 1000); | |
const wPx = (nXmax - nXmin) * image.width; | |
const hPx = (nYmax - nYmin) * image.height; | |
if (wPx === 0 || hPx === 0) return; // skip degenerate boxes | |
/* ── overlay bounding‑box + mask on the main canvas ── */ | |
ctx.strokeStyle = colors[index % colors.length]; | |
ctx.lineWidth = 5; | |
ctx.strokeRect( | |
nXmin * image.width + 80, | |
nYmin * image.height + 20, | |
wPx, | |
hPx | |
); | |
const maskPaint = new Image(); | |
maskPaint.onload = () => { | |
ctx.globalAlpha = 0.7; | |
ctx.drawImage( | |
maskPaint, | |
nXmin * image.width + 80, | |
nYmin * image.height + 20, | |
wPx, | |
hPx | |
); | |
ctx.globalAlpha = 1; | |
}; | |
maskPaint.src = data.mask; | |
/* ───────────── preview row ───────────── */ | |
const MAX_H = 200; // max row height in px | |
const scale = MAX_H / hPx; | |
const scaledW = Math.round(wPx * scale); // maintain true width | |
const row = document.createElement('div'); | |
row.className = 'bounding-box-container'; | |
const label = document.createElement('p'); | |
label.textContent = `Box with Mask: [${data.box.join(', ')}]`; | |
row.appendChild(label); | |
const compare = document.createElement('div'); | |
compare.className = 'image-comparison'; // flex container | |
compare.style.gap = '20px'; // some breathing room | |
/* ---- helper to make column ---- */ | |
const makeCol = () => { | |
const col = document.createElement('div'); | |
col.style.flex = '0 0 auto'; // size from content | |
col.style.width = `${scaledW}px`; | |
col.style.height = `${MAX_H}px`; | |
col.style.overflow = 'hidden'; | |
return col; | |
}; | |
/* ---- left : cropped original ---- */ | |
const leftCol = makeCol(); | |
// crop snapshot | |
const cropC = document.createElement('canvas'); | |
cropC.width = wPx; | |
cropC.height = hPx; | |
cropC.getContext('2d').drawImage( | |
image, | |
nXmin * image.width, | |
nYmin * image.height, | |
wPx, | |
hPx, | |
0, | |
0, | |
wPx, | |
hPx | |
); | |
const cropImg = document.createElement('img'); | |
cropImg.src = cropC.toDataURL('image/jpeg'); | |
Object.assign(cropImg.style, { | |
width: '100%', | |
height: '100%', | |
objectFit: 'fill' // stretch so exact W×H match | |
}); | |
leftCol.appendChild(cropImg); | |
/* ---- right : mask ---- */ | |
const rightCol = makeCol(); | |
rightCol.style.border = `2px solid ${colors[index % colors.length]}`; | |
const maskPrev = document.createElement('img'); | |
maskPrev.src = data.mask; | |
Object.assign(maskPrev.style, { | |
width: '100%', | |
height: '100%', | |
objectFit: 'fill' // stretch identically | |
}); | |
rightCol.appendChild(maskPrev); | |
/* ---- assemble ---- */ | |
compare.appendChild(leftCol); | |
compare.appendChild(rightCol); | |
row.appendChild(compare); | |
boundingBoxImages.appendChild(row); | |
}); | |
// Draw axes and labels | |
ctx.strokeStyle = '#000000'; | |
ctx.lineWidth = 1; | |
ctx.font = '26px Arial'; | |
ctx.textAlign = 'right'; | |
// Y-axis | |
ctx.beginPath(); | |
ctx.moveTo(80, 20); | |
ctx.lineTo(80, image.height + 20); | |
ctx.stroke(); | |
// Y-axis labels and ticks | |
for (let i = 0; i <= 1000; i += 100) { | |
const y = 20 + i / 1000 * image.height; | |
ctx.fillText(i.toString(), 75, y + 5); | |
ctx.beginPath(); | |
ctx.moveTo(75, y); | |
ctx.lineTo(80, y); | |
ctx.stroke(); | |
} | |
// X-axis | |
ctx.beginPath(); | |
ctx.moveTo(80, image.height + 20); | |
ctx.lineTo(image.width + 80, image.height + 20); | |
ctx.stroke(); | |
// X-axis labels and ticks | |
ctx.textAlign = 'center'; | |
for (let i = 0; i <= 1000; i += 100) { | |
const x = 80 + i / 1000 * image.width; | |
ctx.fillText(i.toString(), x, image.height + 40); | |
ctx.beginPath(); | |
ctx.moveTo(x, image.height + 20); | |
ctx.lineTo(x, image.height + 25); | |
ctx.stroke(); | |
} | |
}; | |
image.src = event.target.result; | |
}; | |
reader.readAsDataURL(file); | |
} | |
function clearImage() { | |
document.getElementById('imageInput').value = ''; | |
document.getElementById('canvas').getContext('2d').clearRect(0, 0, canvas.width, canvas.height); | |
document.getElementById('imagePreview').src = ''; | |
document.getElementById('imagePreview').style.display = 'none'; | |
document.getElementById('boundingBoxImages').innerHTML = ''; | |
} | |
function previewImage(event) { | |
const file = event.target.files[0]; | |
if (file) { | |
const reader = new FileReader(); | |
reader.onload = function(e) { | |
const img = document.getElementById('imagePreview'); | |
img.src = e.target.result; | |
img.style.display = 'block'; | |
} | |
reader.readAsDataURL(file); | |
} | |
} | |
// Attach event listeners | |
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt); | |
document.getElementById('clearImageBtn').addEventListener('click', clearImage); | |
document.getElementById('imageInput').addEventListener('change', previewImage); | |
</script> | |
<style> | |
body { | |
font-family: Arial, sans-serif; | |
max-width: 800px; | |
margin: 0 auto; | |
padding: 20px; | |
} | |
textarea { | |
width: 100%; | |
height: 100px; | |
} | |
#result { | |
margin-top: 20px; | |
border: 1px solid #ccc; | |
padding: 10px; | |
overflow-x: auto; /* Add horizontal scrollbar when needed */ | |
white-space: pre-wrap; /* Preserve whitespace and line breaks */ | |
max-height: 500px; /* Set a maximum height */ | |
overflow-y: auto; /* Add vertical scrollbar when needed */ | |
} | |
#imageContainer { | |
margin-top: 20px; | |
border: 1px solid #ccc; | |
padding: 10px; | |
} | |
#canvas { | |
max-width: 100%; | |
height: auto; | |
} | |
#imagePreview { | |
max-width: 100%; | |
display: none; | |
margin-top: 10px; | |
} | |
#boundingBoxImages { | |
margin-top: 20px; | |
} | |
#boundingBoxImages img { | |
max-width: 100%; | |
} | |
.bounding-box-container { | |
display: block; | |
margin-bottom: 30px; | |
border-bottom: 1px solid #eee; | |
padding-bottom: 20px; | |
} | |
.bounding-box-container p { | |
margin: 0 0 10px 0; | |
font-weight: bold; | |
} | |
.image-comparison { | |
display: flex; | |
justify-content: space-between; | |
width: 100%; | |
} | |
.image-column { | |
width: 48%; | |
max-width: 48%; | |
overflow: hidden; | |
border: 1px solid #ccc; | |
} | |
.image-column > div { | |
width: 100%; | |
position: relative; | |
} | |
/* Style for code blocks inside the result div */ | |
#result pre { | |
overflow-x: auto; | |
white-space: pre; | |
background-color: #f5f5f5; | |
padding: 10px; | |
border-radius: 4px; | |
} | |
#result code { | |
font-family: monospace; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Gemini API Image Mask Visualization</h1> | |
<select id="modelSelect"> | |
<option value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option> | |
<option value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option> | |
</select> | |
<input type="file" id="imageInput" accept="image/*"> | |
<button id="clearImageBtn">Clear Image</button> | |
<textarea id="promptInput">Give the segmentation masks for the objects. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in the key "box_2d" and the segmentation mask in key "mask".</textarea> | |
<button id="submitBtn">Process</button> | |
<div id="result"></div> | |
<div id="imageContainer"> | |
<img id="imagePreview" alt="Image preview" /> | |
<canvas id="canvas"></canvas> | |
</div> | |
<div id="boundingBoxImages"></div> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment