tarruda · November 2, 2025 00:07
diff --git a/llama-cpp-annotate.html b/llama-cpp-annotate.html
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
  <title>Image OCR and Bounding Box Drawer</title>
  <style>
    :root {
      --pad: 12px;
      --accent: #e53935;
    }
    body {
      font-family: Arial, sans-serif;
      margin: 20px;
      line-height: 1.4;
    }
    .controls {
      display: flex;
      gap: 12px;
      align-items: center;
      flex-wrap: wrap;
      margin-bottom: 10px;
    }
    .controls label {
      display: inline-flex;
      align-items: center;
      gap: 6px;
      font-size: 14px;
    }
    #apiUrl {
      padding: 6px 10px;
      font-size: 14px;
      border: 1px solid #ccc;
      border-radius: 4px;
      width: 400px;
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
    }
    #saveApiBtn {
      padding: 6px 12px;
      background: #4CAF50;
      color: white;
      border: none;
      border-radius: 4px;
      cursor: pointer;
      font-size: 14px;
    }
    #saveApiBtn:hover {
      background: #45a049;
    }
    .format-selector {
      margin: 10px 0;
      padding: 12px;
      background: #f5f5f5;
      border-radius: 6px;
    }
    .format-selector h3 {
      margin-top: 0;
      font-size: 16px;
      color: #333;
    }
    .format-option {
      margin: 8px 0;
      padding: 8px;
      background: white;
      border-radius: 4px;
      cursor: pointer;
      transition: background 0.2s;
    }
    .format-option:hover {
      background: #e8f4f8;
    }
    .format-option input[type="radio"] {
      margin-right: 8px;
    }
    .format-option label {
      cursor: pointer;
      display: block;
    }
    .format-description {
      margin-left: 24px;
      font-size: 12px;
      color: #666;
      margin-top: 4px;
    }
    .prompt-container {
      margin: 10px 0;
    }
    .prompt-container label {
      display: block;
      font-size: 14px;
      margin-bottom: 6px;
      font-weight: 600;
    }
    #promptTextarea {
      width: 100%;
      max-width: 800px;
      min-height: 100px;
      padding: 8px;
      font-size: 14px;
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
      border: 1px solid #ccc;
      border-radius: 4px;
      resize: vertical;
    }
    #submitBtn {
      padding: 8px 10px;
      font-size: 14px;
      cursor: pointer;
    }
    #canvas-container {
      position: relative;
      display: inline-block;
      margin-top: 12px;
    }
    #imageCanvas {
      border: 1px solid #ccc;
      display: block;
    }
    #overlayCanvas {
      position: absolute;
      left: 0;
      top: 0;
      pointer-events: auto;
    }
    #summary {
      margin-top: 20px;
      white-space: pre-wrap;
    }
    #progress {
      margin-top: 20px;
      white-space: pre-wrap;
      border: 1px solid #ddd;
      padding: 10px;
      max-height: 220px;
      overflow-y: auto;
      font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
      font-size: 12px;
      background: #fafafa;
    }
    .pill {
      display: inline-block;
      padding: 2px 6px;
      border-radius: 999px;
      background: #efefef;
      font-size: 12px;
      margin-left: 8px;
    }
    #tooltip {
      position: absolute;
      background: rgba(0, 0, 0, 0.85);
      color: white;
      padding: 6px 10px;
      border-radius: 4px;
      font-size: 13px;
      pointer-events: none;
      z-index: 1000;
      max-width: 300px;
      word-wrap: break-word;
      white-space: pre-wrap;
      display: none;
    }
    .preset-buttons {
      margin-top: 8px;
      display: flex;
      gap: 8px;
      flex-wrap: wrap;
    }
    .preset-btn {
      padding: 4px 10px;
      background: #f0f0f0;
      border: 1px solid #ccc;
      border-radius: 4px;
      cursor: pointer;
      font-size: 12px;
    }
    .preset-btn:hover {
      background: #e0e0e0;
    }
  </style>
 </head>
 <body>
  <h1>Upload Image for OCR and Bounding Boxes</h1>

  <div class="controls">
    <input type="text" id="apiUrl" value="http://localhost:8080/v1/chat/completions" />
    <button id="saveApiBtn">Save</button>
    <input type="file" id="imageUpload" accept="image/*">
    <button id="submitBtn">Process Image</button>
    <label><input type="checkbox" id="toggleLabels"> Show labels</label>
    <span id="streamStatus" class="pill" hidden>streaming…</span>
  </div>

  <div class="format-selector">
    <h3>Response Format</h3>
    <div class="format-option">
      <label>
        <input type="radio" name="format" value="format1" checked>
        <strong>Format 1: Object-based</strong>
      </label>
      <div class="format-description">
        Each bounding box is an object with bbox_2d array and text_content string
      </div>
    </div>
    <div class="format-option">
      <label>
        <input type="radio" name="format" value="format2">
        <strong>Format 2: Tuple-based</strong>
      </label>
      <div class="format-description">
        Each bounding box is a 6-element array: [text_label, x_min, y_min, x_max, y_max, color_name]
      </div>
    </div>
  </div>

  <div class="prompt-container">
    <label for="promptTextarea">Prompt:</label>
    <textarea id="promptTextarea">Spotting all the objects and all the text in the image with line-level, Output in JSON format as { "summary": "summary of the whole image", "bounding_boxes": [ { "bbox_2d": [x1,y1,x2,y2], "text_content": "text_or_description" } ] }.</textarea>
  </div>

  <div id="canvas-container">
    <canvas id="imageCanvas"></canvas>
    <canvas id="overlayCanvas"></canvas>
  </div>

  <div id="tooltip"></div>

  <div id="summary"></div>
  <div id="progress">Idle. Upload an image to preview, then click "Process Image".</div>

  <script>
    // ----------------------- DOM references -----------------------
    const fileInput       = document.getElementById('imageUpload');
    const submitBtn       = document.getElementById('submitBtn');
    const progressDiv     = document.getElementById('progress');
    const summaryDiv      = document.getElementById('summary');
    const imageCanvas     = document.getElementById('imageCanvas');
    const overlayCanvas   = document.getElementById('overlayCanvas');
    const overlayCtx      = overlayCanvas.getContext('2d');
    const toggleLabels    = document.getElementById('toggleLabels');
    const promptTextarea  = document.getElementById('promptTextarea');
    const streamStatusEl  = document.getElementById('streamStatus');
    const tooltip         = document.getElementById('tooltip');
    const apiUrlInput     = document.getElementById('apiUrl');
    const saveApiBtn      = document.getElementById('saveApiBtn');

    // ----------------------- State -----------------------
    let uploadedDataUrl = null;
    let base64Image     = null;
    let mimeType        = null;
    let imgEl           = new Image();
    let interimBoxes    = [];
    let finalBoxes      = [];
    let rafPending      = false;
    let streamingActive = false;
    let hoveredBox      = null;
    let apiUrl          = apiUrlInput.value; // Use default value

    // ----------------------- API Configuration -----------------------
    // Load saved API URL from localStorage
    const savedApiUrl = localStorage.getItem('ocrApiUrl');
    if (savedApiUrl) {
      apiUrlInput.value = savedApiUrl;
      apiUrl = savedApiUrl;
    }

    // Save API URL
    saveApiBtn.addEventListener('click', () => {
      const url = apiUrlInput.value.trim();
      if (!url) {
        alert('Please enter an API URL');
        return;
      }
      
      apiUrl = url;
      localStorage.setItem('ocrApiUrl', url);
      alert('API URL saved!');
    });

    // ----------------------- Prompt Templates -----------------------
    const prompts = {
      format1: `Spotting all the text in the image with line-level, Output in JSON format as { "summary": "summary of the whole image", "bounding_boxes": [ { "bbox_2d": [x1,y1,x2,y2], "text_content": "text" } ] }.`,
      
      format2: `Perform a comprehensive analysis of the provided image to detect and extract all **text** objects present. Your task is to identify every instance of text objects within the image, regardless of its size and orientation.

 Bounding boxes:

 - text_label: The label of the bounding box
 - x_min: x-coordinate of the top-left corner (in pixels)
 - y_min: y-coordinate of the top-left corner (in pixels)
 - x_max: x-coordinate of the bottom-right corner (in pixels)
 - y_max: y-coordinate of the bottom-right corner (in pixels)
 - color_name: The color name

 JSON format:

 \`\`\`json
 {
  "has": <true|false>,
  "bounding_boxes": [
    [ <text_label>, <x_min>, <y_min>, <x_max>, <y_max>, <color_name> ],
  ],
  "summary": "Summary of the image"
 }
 \`\`\``
    };

    // ----------------------- Format Selection -----------------------
    const formatRadios = document.querySelectorAll('input[name="format"]');
    
    formatRadios.forEach(radio => {
      radio.addEventListener('change', () => {
        const selectedFormat = radio.value;
        promptTextarea.value = prompts[selectedFormat];
      });
    });

    function getSelectedFormat() {
      const selected = document.querySelector('input[name="format"]:checked');
      return selected ? selected.value : 'format1';
    }

    // ----------------------- JSON Schemas -----------------------
    const jsonSchema1 = {
      name: 'image_description_with_boxes',
      strict: true,
      schema: {
        type: 'object',
        additionalProperties: false,
        properties: {
          bounding_boxes: {
            type: 'array',
            description: 'List of detected regions.',
            items: {
              type: 'object',
              additionalProperties: false,
              properties: {
                bbox_2d: {
                  type: 'array',
                  prefixItems: [
                    { type: 'number', description: 'x1' },
                    { type: 'number', description: 'y1' },
                    { type: 'number', description: 'x2' },
                    { type: 'number', description: 'y2' }
                  ],
                  minItems: 4,
                  maxItems: 4
                },
                text_content: {
                  type: 'string',
                  description: 'either text content or a description of the object delimited by the bounding box'
                }
              },
              required: ['bbox_2d', 'text_content']
            }
          },
          summary: { type: 'string', description: 'Summary of the image.' },
        },
        required: ['bounding_boxes', 'summary']
      }
    };

    const jsonSchema2 = {
      name: 'image_regions_v1',
      strict: true,
      schema: {
        type: 'object',
        additionalProperties: false,
        properties: {
          has: { type: 'boolean', description: 'Whether the target object(s) are present.' },
          bounding_boxes: {
            type: 'array',
            description: 'List of detected regions as fixed-length tuples.',
            items: {
              type: 'array',
              prefixItems: [
                { type: 'string',  description: 'text_label' },
                { type: 'number',  description: 'x_min' },
                { type: 'number',  description: 'y_min' },
                { type: 'number',  description: 'x_max' },
                { type: 'number',  description: 'y_max' },
                { type: 'string',  description: 'color_name' }
              ],
              minItems: 6,
              maxItems: 6
            }
          },
          summary: { type: 'string', description: 'Summary of the image.' }
        },
        required: ['has', 'bounding_boxes', 'summary']
      }
    };

    // ----------------------- Helpers: canvas setup & drawing -----------------------
    function setupCanvases(w, h) {
      [imageCanvas, overlayCanvas].forEach(c => {
        c.width = w;
        c.height = h;
        c.style.width = w + 'px';
        c.style.height = h + 'px';
      });
      clearOverlay();
    }

    function drawBaseImage() {
      const ictx = imageCanvas.getContext('2d');
      ictx.clearRect(0, 0, imageCanvas.width, imageCanvas.height);
      ictx.drawImage(imgEl, 0, 0, imageCanvas.width, imageCanvas.height);
    }

    function clearOverlay() {
      overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height);
    }

    function labelsEnabled() {
      return toggleLabels.checked;
    }

    function scaleBbox(bbox, W, H) {
      let [x1, y1, x2, y2] = bbox.map(Number);
      const maxVal = Math.max(Math.abs(x1), Math.abs(y1), Math.abs(x2), Math.abs(y2));
      if (maxVal <= 1 + 1e-6) {
        x1 *= W; y1 *= H; x2 *= W; y2 *= H;
      } else if (maxVal <= 1000 + 1e-6) {
        x1 = x1 / 1000 * W; y1 = y1 / 1000 * H; x2 = x2 / 1000 * W; y2 = y2 / 1000 * H;
      }
      const sx = Math.min(x1, x2), ex = Math.max(x1, x2);
      const sy = Math.min(y1, y2), ey = Math.max(y1, y2);
      return [
        Math.max(0, Math.min(W, sx)),
        Math.max(0, Math.min(H, sy)),
        Math.max(0, Math.min(W, ex)),
        Math.max(0, Math.min(H, ey))
      ];
    }

    function drawBoxes(ctx, boxes, showLabels) {
      const W = imageCanvas.width;
      const H = imageCanvas.height;
      ctx.save();
      ctx.lineWidth = 2;
      ctx.font = '12px Arial';
      
      boxes.forEach(b => {
        if (!b || !Array.isArray(b.bbox_2d)) return;
        const [x1, y1, x2, y2] = scaleBbox(b.bbox_2d, W, H);
        const w = Math.max(0, x2 - x1);
        const h = Math.max(0, y2 - y1);
        if (w <= 0 || h <= 0) return;

        if (hoveredBox === b) {
          ctx.strokeStyle = '#ff6b6b';
          ctx.lineWidth = 3;
        } else {
          ctx.strokeStyle = '#ff0000';
          ctx.lineWidth = 2;
        }
        
        ctx.strokeRect(x1, y1, w, h);

        if (showLabels && typeof b.text_content === 'string' && b.text_content.trim()) {
          const label = b.text_content.trim();
          const padX = 4, padY = 3;
          const textW = ctx.measureText(label).width;
          const textH = 12;
          const bx = x1, by = Math.max(0, y1 - textH - 6);

          ctx.fillStyle = 'rgba(255,0,0,0.18)';
          ctx.fillRect(bx, by, textW + padX * 2, textH + padY * 2);
          ctx.fillStyle = '#ff0000';
          ctx.fillText(label, bx + padX, by + textH + 1);
        }
      });
      ctx.restore();
    }

    function scheduleOverlayRender() {
      if (rafPending) return;
      rafPending = true;
      requestAnimationFrame(() => {
        rafPending = false;
        clearOverlay();
        const boxes = (streamingActive ? interimBoxes : finalBoxes);
        drawBoxes(overlayCtx, boxes, labelsEnabled());
      });
    }

    // ----------------------- Hover detection -----------------------
    function getBoxAtPoint(x, y) {
      const W = imageCanvas.width;
      const H = imageCanvas.height;
      const boxes = (streamingActive ? interimBoxes : finalBoxes);
      
      for (let i = boxes.length - 1; i >= 0; i--) {
        const b = boxes[i];
        if (!b || !Array.isArray(b.bbox_2d)) continue;
        
        const [x1, y1, x2, y2] = scaleBbox(b.bbox_2d, W, H);
        if (x >= x1 && x <= x2 && y >= y1 && y <= y2) {
          return b;
        }
      }
      return null;
    }

    overlayCanvas.addEventListener('mousemove', (e) => {
      const rect = overlayCanvas.getBoundingClientRect();
      const x = (e.clientX - rect.left) * (overlayCanvas.width / rect.width);
      const y = (e.clientY - rect.top) * (overlayCanvas.height / rect.height);
      
      const box = getBoxAtPoint(x, y);
      
      if (box !== hoveredBox) {
        hoveredBox = box;
        scheduleOverlayRender();
      }
      
      if (box && box.text_content) {
        tooltip.textContent = box.text_content;
        tooltip.style.display = 'block';
        tooltip.style.left = (e.pageX + 10) + 'px';
        tooltip.style.top = (e.pageY - 30) + 'px';
        overlayCanvas.style.cursor = 'pointer';
      } else {
        tooltip.style.display = 'none';
        overlayCanvas.style.cursor = 'default';
      }
    });

    overlayCanvas.addEventListener('mouseleave', () => {
      hoveredBox = null;
      tooltip.style.display = 'none';
      scheduleOverlayRender();
    });

    // ----------------------- Helpers: streaming JSON parsing -----------------------
    function extractFinalJsonStringFrom(accumulated) {
      const fenceStart = accumulated.indexOf('```json');
      if (fenceStart !== -1) {
        const afterFence = accumulated.slice(fenceStart + 7);
        const fenceEnd = afterFence.indexOf('```');
        if (fenceEnd !== -1) {
          return afterFence.slice(0, fenceEnd).trim();
        }
      }
      const firstBrace = accumulated.indexOf('{');
      if (firstBrace === -1) return null;

      let depth = 0, inStr = false, esc = false, endIndex = -1;
      for (let i = firstBrace; i < accumulated.length; i++) {
        const ch = accumulated[i];
        if (inStr) {
          if (esc) { esc = false; }
          else if (ch === '\\') { esc = true; }
          else if (ch === '"') { inStr = false; }
        } else {
          if (ch === '"') inStr = true;
          else if (ch === '{') depth++;
          else if (ch === '}') {
            depth--;
            if (depth === 0) { endIndex = i; break; }
          }
        }
      }
      if (endIndex !== -1) {
        return accumulated.slice(firstBrace, endIndex + 1).trim();
      }
      return null;
    }

    function extractPartialBoxes(accumulated, format) {
      if (format === 'format2') {
        return extractPartialBoxesFormat2(accumulated);
      }
      return extractPartialBoxesFormat1(accumulated);
    }

    function extractPartialBoxesFormat1(accumulated) {
      const keyIdx = accumulated.indexOf('"bounding_boxes"');
      if (keyIdx === -1) return [];

      const startBracket = accumulated.indexOf('[', keyIdx);
      if (startBracket === -1) return [];

      const s = accumulated.slice(startBracket + 1);
      const boxes = [];
      let inStr = false, esc = false, braceDepth = 0, objStart = -1;

      for (let i = 0; i < s.length; i++) {
        const ch = s[i];
        if (inStr) {
          if (esc) { esc = false; }
          else if (ch === '\\') { esc = true; }
          else if (ch === '"') { inStr = false; }
          continue;
        }
        if (ch === '"') { inStr = true; continue; }
        if (ch === '{') {
          if (braceDepth === 0) objStart = i;
          braceDepth++;
        } else if (ch === '}') {
          braceDepth--;
          if (braceDepth === 0 && objStart !== -1) {
            const objStr = s.slice(objStart, i + 1);
            try {
              const obj = JSON.parse(objStr);
              if (obj && Array.isArray(obj.bbox_2d) && 'text_content' in obj) {
                boxes.push(obj);
              }
            } catch {
              // ignore incomplete objects
            }
            objStart = -1;
          }
        } else if (ch === ']' && braceDepth === 0) {
          break;
        }
      }
      return dedupeBoxes(boxes);
    }

    function extractPartialBoxesFormat2(accumulated) {
      const keyIdx = accumulated.indexOf('"bounding_boxes"');
      if (keyIdx === -1) return [];

      const startBracket = accumulated.indexOf('[', keyIdx);
      if (startBracket === -1) return [];

      const s = accumulated.slice(startBracket + 1);
      const boxes = [];
      let inStr = false, esc = false, bracketDepth = 0, arrayStart = -1;

      for (let i = 0; i < s.length; i++) {
        const ch = s[i];
        if (inStr) {
          if (esc) { esc = false; }
          else if (ch === '\\') { esc = true; }
          else if (ch === '"') { inStr = false; }
          continue;
        }
        if (ch === '"') { inStr = true; continue; }
        if (ch === '[') {
          if (bracketDepth === 0) arrayStart = i;
          bracketDepth++;
        } else if (ch === ']') {
          bracketDepth--;
          if (bracketDepth === 0 && arrayStart !== -1) {
            const arrStr = s.slice(arrayStart, i + 1);
            try {
              const arr = JSON.parse(arrStr);
              if (Array.isArray(arr) && arr.length >= 4) {
                // Convert format2 tuple to format1 object
                const box = {
                  bbox_2d: [arr[1], arr[2], arr[3], arr[4]],
                  text_content: arr[0]
                };
                boxes.push(box);
              }
            } catch {
              // ignore incomplete arrays
            }
            arrayStart = -1;
          } else if (bracketDepth === -1) {
            break; // End of outer array
          }
        }
      }
      return dedupeBoxes(boxes);
    }

    function dedupeBoxes(boxes) {
      const seen = new Set();
      const out = [];
      for (const b of boxes) {
        const key = JSON.stringify([b.text_content, ...(b.bbox_2d || [])]);
        if (!seen.has(key)) { seen.add(key); out.push(b); }
      }
      return out;
    }

    function convertFormat2ToFormat1(result) {
      if (!result.bounding_boxes || !Array.isArray(result.bounding_boxes)) {
        return result;
      }
      
      // Convert tuple format to object format
      const convertedBoxes = result.bounding_boxes.map(tuple => {
        if (Array.isArray(tuple) && tuple.length >= 5) {
          return {
            bbox_2d: [tuple[1], tuple[2], tuple[3], tuple[4]],
            text_content: tuple[0]
          };
        }
        return tuple; // Return as-is if not in expected format
      });

      return {
        ...result,
        bounding_boxes: convertedBoxes
      };
    }

    // ----------------------- Image upload -----------------------
    fileInput.addEventListener('change', () => {
      const file = fileInput.files[0];
      if (!file) return;

      const reader = new FileReader();
      reader.onload = e => {
        uploadedDataUrl = e.target.result;
        const parts = uploadedDataUrl.split(',');
        mimeType = parts[0].split(':')[1].split(';')[0];
        base64Image = parts[1];

        imgEl = new Image();
        imgEl.onload = () => {
          setupCanvases(imgEl.naturalWidth, imgEl.naturalHeight);
          drawBaseImage();
          interimBoxes = [];
          finalBoxes = [];
          streamingActive = false;
          hoveredBox = null;
          progressDiv.textContent = 'Image loaded. Click "Process Image" to start.';
          summaryDiv.textContent = '';
          scheduleOverlayRender();
        };
        imgEl.src = uploadedDataUrl;
      };
      reader.readAsDataURL(file);
    });

    toggleLabels.addEventListener('change', scheduleOverlayRender);

    // ----------------------- Process Image (streaming) -----------------------
    submitBtn.addEventListener('click', async () => {
      if (!apiUrl) {
        alert('Please configure the API URL first.');
        return;
      }

      clearOverlay();
      const file = fileInput.files[0];
      if (!file) { alert('Please upload an image.'); return; }

      if (!uploadedDataUrl || !base64Image || !mimeType) {
        const dataUrl = await new Promise((resolve, reject) => {
          const r = new FileReader();
          r.onload = e => resolve(e.target.result);
          r.onerror = reject;
          r.readAsDataURL(file);
        });
        uploadedDataUrl = dataUrl;
        const parts = uploadedDataUrl.split(',');
        mimeType = parts[0].split(':')[1].split(';')[0];
        base64Image = parts[1];

        if (!imgEl.src) {
          imgEl = new Image();
          imgEl.onload = () => {
            setupCanvases(imgEl.naturalWidth, imgEl.naturalHeight);
            drawBaseImage();
          };
          imgEl.src = uploadedDataUrl;
        }
      }

      const prompt = promptTextarea.value.trim();
      const selectedFormat = getSelectedFormat();
      const jsonSchema = selectedFormat === 'format1' ? jsonSchema1 : jsonSchema2;

      progressDiv.textContent = `Uploading and processing (streaming with ${selectedFormat})…`;
      streamStatusEl.hidden = false;
      streamingActive = true;
      interimBoxes = [];
      finalBoxes = [];
      hoveredBox = null;

      const payload = {
        model: 'gpt-4o',
        messages: [
          {
            role: 'user',
            content: [
              { type: 'text', text: prompt },
              { type: 'image_url', image_url: { url: `data:${mimeType};base64,${base64Image}`, detail: 'high' } }
            ]
          }
        ],
        max_tokens: 24000,
        temperature: 0,
        stream: true,
        response_format: {
          type: "json_schema",
          json_schema: jsonSchema 
        }
      };

      try {
        const resp = await fetch(apiUrl, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify(payload)
        });
        if (!resp.ok) throw new Error(`API request failed (${resp.status})`);

        const reader = resp.body.getReader();
        const decoder = new TextDecoder();
        let accumulated = '';
        let isDone = false;
        let totalParsedInterim = 0;

        while (!isDone) {
          const { done, value } = await reader.read();
          isDone = done;
          if (value) {
            const chunk = decoder.decode(value);
            const lines = chunk.split('\n');
            for (const line of lines) {
              if (!line.startsWith('data: ')) continue;
              const data = line.slice(6);
              if (data === '[DONE]') { isDone = true; break; }

              try {
                const parsed = JSON.parse(data);
                const delta = parsed?.choices?.[0]?.delta?.content;
                if (typeof delta === 'string') {
                  accumulated += delta;
                  progressDiv.textContent = accumulated.slice(-4000);
                  const partial = extractPartialBoxes(accumulated, selectedFormat);
                  if (partial.length !== totalParsedInterim) {
                    interimBoxes = partial;
                    totalParsedInterim = partial.length;
                    scheduleOverlayRender();
                  }
                }
                if (parsed?.choices?.[0]?.finish_reason) {
                  isDone = true;
                }
              } catch (e) {
                // Ignore non-JSON SSE lines
              }
            }
          }
        }

        streamStatusEl.hidden = true;
        progressDiv.textContent += '\n\nProcessing complete. Parsing final JSON…';

        const jsonStr = extractFinalJsonStringFrom(accumulated);
        if (!jsonStr) throw new Error('Could not extract a complete JSON object from the stream.');

        let result;
        try {
          result = JSON.parse(jsonStr);
        } catch (e) {
          throw new Error('Final JSON parse failed.');
        }

        // Convert format2 to format1 for consistent handling
        if (selectedFormat === 'format2') {
          result = convertFormat2ToFormat1(result);
        }

        summaryDiv.textContent = result.summary || 'No summary provided.';
        finalBoxes = Array.isArray(result.bounding_boxes) ? result.bounding_boxes : [];
        streamingActive = false;
        hoveredBox = null;
        scheduleOverlayRender();

        progressDiv.textContent += `\nRendering complete. Final boxes: ${finalBoxes.length}.`;

      } catch (error) {
        console.error(error);
        streamStatusEl.hidden = true;
        streamingActive = false;
        progressDiv.textContent = 'Failed to process image: ' + error.message;
      }
    });
  </script>
 </body>
 </html>
No results found