Skip to content

Instantly share code, notes, and snippets.

@bigsnarfdude
Created April 18, 2025 15:30
Show Gist options
  • Save bigsnarfdude/28fd30a8b80cf13012dc6eb690d2c092 to your computer and use it in GitHub Desktop.
Save bigsnarfdude/28fd30a8b80cf13012dc6eb690d2c092 to your computer and use it in GitHub Desktop.
image_segmentation_gemini_api.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gemini API Image Mask Visualization</title>
<script type="module">
import { GoogleGenerativeAI } from "https://esm.run/@google/generative-ai";
import { marked } from "https://esm.run/marked";
function getApiKey() {
let apiKey = localStorage.getItem("GEMINI_API_KEY");
if (!apiKey) {
apiKey = prompt("Please enter your Gemini API key:");
if (apiKey) {
localStorage.setItem("GEMINI_API_KEY", apiKey);
}
}
return apiKey;
}
async function getGenerativeModel(params) {
const API_KEY = getApiKey();
const genAI = new GoogleGenerativeAI(API_KEY);
return genAI.getGenerativeModel(params);
}
async function fileToGenerativePart(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve({
inlineData: {
data: reader.result.split(",")[1],
mimeType: file.type
}
});
reader.readAsDataURL(file);
});
}
function resizeAndCompressImage(file) {
return new Promise((resolve) => {
const reader = new FileReader();
reader.onload = function(event) {
const img = new Image();
img.onload = function() {
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
let width = img.width;
let height = img.height;
if (width > 1000) {
height = Math.round((height * 1000) / width);
width = 1000;
}
canvas.width = width;
canvas.height = height;
ctx.drawImage(img, 0, 0, width, height);
canvas.toBlob((blob) => {
resolve(new File([blob], "compressed_image.jpg", { type: "image/jpeg" }));
}, 'image/jpeg', 0.7);
};
img.src = event.target.result;
};
reader.readAsDataURL(file);
});
}
async function processImageAndPrompt() {
const fileInput = document.getElementById('imageInput');
const promptInput = document.getElementById('promptInput');
const resultDiv = document.getElementById('result');
const modelSelect = document.getElementById('modelSelect');
if (!promptInput.value) {
alert('Please enter a prompt.');
return;
}
resultDiv.innerHTML = 'Processing...';
// Clear previous bounding box images
document.getElementById('boundingBoxImages').innerHTML = '';
try {
const model = await getGenerativeModel({ model: modelSelect.value });
let content = [promptInput.value];
if (fileInput.files[0]) {
const compressedImage = await resizeAndCompressImage(fileInput.files[0]);
const imagePart = await fileToGenerativePart(compressedImage);
content.push(imagePart);
}
const result = await model.generateContent(content);
const response = await result.response;
const text = response.text();
resultDiv.innerHTML = marked.parse(text);
if (fileInput.files[0]) {
// Extract box_2d + mask data only
const boxAndMaskData = extractBoxAndMask(text);
if (boxAndMaskData.length > 0) {
displayImageWithBoundingBoxesAndMasks(fileInput.files[0], boxAndMaskData);
}
}
} catch (error) {
resultDiv.innerHTML = `Error: ${error.message}`;
}
}
function extractBoxAndMask(text) {
// Look for patterns like "box_2d": [233, 603, 425, 861], "mask": "data:image/png;base64,..."
const regex = /"box_2d"\s*:\s*\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]\s*,\s*"mask"\s*:\s*"(data:image\/png;base64,[^"]+)"/g;
const results = [];
let match;
while ((match = regex.exec(text)) !== null) {
try {
// Extract the box_2d coordinates
const boxMatch = text.substring(match.index, match.index + 100).match(/"box_2d"\s*:\s*(\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\])/);
if (boxMatch && boxMatch[1]) {
const box = JSON.parse(boxMatch[1]);
const mask = match[1]; // This is the data:image/png;base64,... string
results.push({ box, mask });
}
} catch (e) {
console.error("Failed to parse box and mask:", e);
}
}
return results;
}
function displayImageWithBoundingBoxesAndMasks(file, boxAndMaskData) {
const reader = new FileReader();
reader.onload = function(event) {
const image = new Image();
image.onload = function() {
const canvas = document.getElementById('canvas');
canvas.width = image.width + 100;
canvas.height = image.height + 100;
const ctx = canvas.getContext('2d');
// Draw the image
ctx.drawImage(image, 80, 20);
// Draw grid lines
ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)'; // Red with 50% opacity
ctx.lineWidth = 1;
// Vertical grid lines
for (let i = 0; i <= 1000; i += 100) {
const x = 80 + i / 1000 * image.width;
ctx.beginPath();
ctx.moveTo(x, 20);
ctx.lineTo(x, image.height + 20);
ctx.stroke();
}
// Horizontal grid lines
for (let i = 0; i <= 1000; i += 100) {
const y = 20 + (1000 - i) / 1000 * image.height;
ctx.beginPath();
ctx.moveTo(80, y);
ctx.lineTo(image.width + 80, y);
ctx.stroke();
}
// Draw bounding boxes
const colors = ['#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#FF00FF', '#00FFFF'];
const boundingBoxImages = document.getElementById('boundingBoxImages');
boundingBoxImages.innerHTML = ''; // Clear previous bounding box images
// Draw box_2d and mask data
boxAndMaskData.forEach((data, index) => {
const [ymin, xmin, ymax, xmax] = data.box;
/* ── normalise if the coordinates are absolute pixels ── */
const norm = (ymin > 1000 || xmin > 1000 || ymax > 1000 || xmax > 1000)
? [
(ymin / image.height) * 1000,
(xmin / image.width) * 1000,
(ymax / image.height) * 1000,
(xmax / image.width) * 1000
]
: data.box;
const [nYmin, nXmin, nYmax, nXmax] = norm.map(v => v / 1000);
const wPx = (nXmax - nXmin) * image.width;
const hPx = (nYmax - nYmin) * image.height;
if (wPx === 0 || hPx === 0) return; // skip degenerate boxes
/* ── overlay bounding‑box + mask on the main canvas ── */
ctx.strokeStyle = colors[index % colors.length];
ctx.lineWidth = 5;
ctx.strokeRect(
nXmin * image.width + 80,
nYmin * image.height + 20,
wPx,
hPx
);
const maskPaint = new Image();
maskPaint.onload = () => {
ctx.globalAlpha = 0.7;
ctx.drawImage(
maskPaint,
nXmin * image.width + 80,
nYmin * image.height + 20,
wPx,
hPx
);
ctx.globalAlpha = 1;
};
maskPaint.src = data.mask;
/* ───────────── preview row ───────────── */
const MAX_H = 200; // max row height in px
const scale = MAX_H / hPx;
const scaledW = Math.round(wPx * scale); // maintain true width
const row = document.createElement('div');
row.className = 'bounding-box-container';
const label = document.createElement('p');
label.textContent = `Box with Mask: [${data.box.join(', ')}]`;
row.appendChild(label);
const compare = document.createElement('div');
compare.className = 'image-comparison'; // flex container
compare.style.gap = '20px'; // some breathing room
/* ---- helper to make column ---- */
const makeCol = () => {
const col = document.createElement('div');
col.style.flex = '0 0 auto'; // size from content
col.style.width = `${scaledW}px`;
col.style.height = `${MAX_H}px`;
col.style.overflow = 'hidden';
return col;
};
/* ---- left : cropped original ---- */
const leftCol = makeCol();
// crop snapshot
const cropC = document.createElement('canvas');
cropC.width = wPx;
cropC.height = hPx;
cropC.getContext('2d').drawImage(
image,
nXmin * image.width,
nYmin * image.height,
wPx,
hPx,
0,
0,
wPx,
hPx
);
const cropImg = document.createElement('img');
cropImg.src = cropC.toDataURL('image/jpeg');
Object.assign(cropImg.style, {
width: '100%',
height: '100%',
objectFit: 'fill' // stretch so exact W×H match
});
leftCol.appendChild(cropImg);
/* ---- right : mask ---- */
const rightCol = makeCol();
rightCol.style.border = `2px solid ${colors[index % colors.length]}`;
const maskPrev = document.createElement('img');
maskPrev.src = data.mask;
Object.assign(maskPrev.style, {
width: '100%',
height: '100%',
objectFit: 'fill' // stretch identically
});
rightCol.appendChild(maskPrev);
/* ---- assemble ---- */
compare.appendChild(leftCol);
compare.appendChild(rightCol);
row.appendChild(compare);
boundingBoxImages.appendChild(row);
});
// Draw axes and labels
ctx.strokeStyle = '#000000';
ctx.lineWidth = 1;
ctx.font = '26px Arial';
ctx.textAlign = 'right';
// Y-axis
ctx.beginPath();
ctx.moveTo(80, 20);
ctx.lineTo(80, image.height + 20);
ctx.stroke();
// Y-axis labels and ticks
for (let i = 0; i <= 1000; i += 100) {
const y = 20 + i / 1000 * image.height;
ctx.fillText(i.toString(), 75, y + 5);
ctx.beginPath();
ctx.moveTo(75, y);
ctx.lineTo(80, y);
ctx.stroke();
}
// X-axis
ctx.beginPath();
ctx.moveTo(80, image.height + 20);
ctx.lineTo(image.width + 80, image.height + 20);
ctx.stroke();
// X-axis labels and ticks
ctx.textAlign = 'center';
for (let i = 0; i <= 1000; i += 100) {
const x = 80 + i / 1000 * image.width;
ctx.fillText(i.toString(), x, image.height + 40);
ctx.beginPath();
ctx.moveTo(x, image.height + 20);
ctx.lineTo(x, image.height + 25);
ctx.stroke();
}
};
image.src = event.target.result;
};
reader.readAsDataURL(file);
}
function clearImage() {
document.getElementById('imageInput').value = '';
document.getElementById('canvas').getContext('2d').clearRect(0, 0, canvas.width, canvas.height);
document.getElementById('imagePreview').src = '';
document.getElementById('imagePreview').style.display = 'none';
document.getElementById('boundingBoxImages').innerHTML = '';
}
function previewImage(event) {
const file = event.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = function(e) {
const img = document.getElementById('imagePreview');
img.src = e.target.result;
img.style.display = 'block';
}
reader.readAsDataURL(file);
}
}
// Attach event listeners
document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
document.getElementById('clearImageBtn').addEventListener('click', clearImage);
document.getElementById('imageInput').addEventListener('change', previewImage);
</script>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
textarea {
width: 100%;
height: 100px;
}
#result {
margin-top: 20px;
border: 1px solid #ccc;
padding: 10px;
overflow-x: auto; /* Add horizontal scrollbar when needed */
white-space: pre-wrap; /* Preserve whitespace and line breaks */
max-height: 500px; /* Set a maximum height */
overflow-y: auto; /* Add vertical scrollbar when needed */
}
#imageContainer {
margin-top: 20px;
border: 1px solid #ccc;
padding: 10px;
}
#canvas {
max-width: 100%;
height: auto;
}
#imagePreview {
max-width: 100%;
display: none;
margin-top: 10px;
}
#boundingBoxImages {
margin-top: 20px;
}
#boundingBoxImages img {
max-width: 100%;
}
.bounding-box-container {
display: block;
margin-bottom: 30px;
border-bottom: 1px solid #eee;
padding-bottom: 20px;
}
.bounding-box-container p {
margin: 0 0 10px 0;
font-weight: bold;
}
.image-comparison {
display: flex;
justify-content: space-between;
width: 100%;
}
.image-column {
width: 48%;
max-width: 48%;
overflow: hidden;
border: 1px solid #ccc;
}
.image-column > div {
width: 100%;
position: relative;
}
/* Style for code blocks inside the result div */
#result pre {
overflow-x: auto;
white-space: pre;
background-color: #f5f5f5;
padding: 10px;
border-radius: 4px;
}
#result code {
font-family: monospace;
}
</style>
</head>
<body>
<h1>Gemini API Image Mask Visualization</h1>
<select id="modelSelect">
<option value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option>
<option value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option>
</select>
<input type="file" id="imageInput" accept="image/*">
<button id="clearImageBtn">Clear Image</button>
<textarea id="promptInput">Give the segmentation masks for the objects. Output a JSON list of segmentation masks where each entry contains the 2D bounding box in the key "box_2d" and the segmentation mask in key "mask".</textarea>
<button id="submitBtn">Process</button>
<div id="result"></div>
<div id="imageContainer">
<img id="imagePreview" alt="Image preview" />
<canvas id="canvas"></canvas>
</div>
<div id="boundingBoxImages"></div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment