Created
August 14, 2025 23:44
-
-
Save ehzawad/ed8e8de2db696e7008e7b873bbc50a52 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import React, { useMemo, useRef, useState, useEffect } from "react"; | |
// Simple FAISS-like toy: IndexFlatL2 | |
class IndexFlatL2 { | |
constructor(d) { | |
this.d = d; // dimensionality | |
this.vectors = []; // each item: {id, x: number[], label?: string} | |
this._nextId = 1; | |
} | |
add(vecs) { | |
for (const v of vecs) { | |
if (!Array.isArray(v) || v.length !== this.d) throw new Error("dimension mismatch"); | |
this.vectors.push({ id: this._nextId++, x: v }); | |
} | |
} | |
// return top k nearest neighbors (indices and distances) | |
search(query, k = 1) { | |
if (!Array.isArray(query) || query.length !== this.d) throw new Error("dimension mismatch"); | |
const dists = this.vectors.map((v) => ({ id: v.id, dist: l2(query, v.x), x: v.x })); | |
dists.sort((a, b) => a.dist - b.dist); | |
return dists.slice(0, Math.min(k, dists.length)); | |
} | |
reset() { this.vectors = []; this._nextId = 1; } | |
} | |
function l2(a, b) { | |
let s = 0; for (let i = 0; i < a.length; i++) { const dx = a[i] - b[i]; s += dx * dx; } return Math.sqrt(s); | |
} | |
// Utility: map between data space [0,1]x[0,1] and SVG pixels | |
function makeScaler(width, height, padding = 24) { | |
const x0 = padding, y0 = padding, x1 = width - padding, y1 = height - padding; | |
const sx = (x) => x0 + x * (x1 - x0); | |
const sy = (y) => y1 - y * (y1 - y0); | |
const ix = (X) => (X - x0) / (x1 - x0); | |
const iy = (Y) => 1 - (Y - y0) / (y1 - y0); | |
return { sx, sy, ix, iy, x0, y0, x1, y1 }; | |
} | |
function FaissIndexPlayground() { | |
const W = 720, H = 480; | |
const scaler = useMemo(() => makeScaler(W, H, 28), [W, H]); | |
const [index] = useState(() => new IndexFlatL2(2)); | |
const [points, setPoints] = useState([]); // {id, x:[u,v]} | |
const [query, setQuery] = useState([0.5, 0.5]); | |
const [k, setK] = useState(1); | |
const [dragging, setDragging] = useState(false); | |
const [seed, setSeed] = useState(42); | |
// recompute search results when needed | |
const result = useMemo(() => index.search(query, k), [index, query, k, points]); | |
// keep points in sync with the index object | |
useEffect(() => { index.reset(); index.add(points.map((p) => p.x)); }, [points, index]); | |
function addPointAt(u, v) { | |
const id = points.length + 1; | |
const clamped = [Math.max(0, Math.min(1, u)), Math.max(0, Math.min(1, v))]; | |
setPoints((ps) => [...ps, { id, x: clamped }]); | |
} | |
function addRandom(n = 100) { | |
let s = mulberry32(seed); | |
const fresh = []; | |
for (let i = 0; i < n; i++) { | |
fresh.push({ id: points.length + i + 1, x: [s(), s()] }); | |
} | |
setSeed(seed + 1); | |
setPoints((ps) => [...ps, ...fresh]); | |
} | |
function clearAll() { setPoints([]); } | |
function onSvgClick(e) { | |
const rect = e.currentTarget.getBoundingClientRect(); | |
const u = scaler.ix(e.clientX - rect.left); | |
const v = scaler.iy(e.clientY - rect.top); | |
addPointAt(u, v); | |
} | |
function onMouseDown(e) { | |
setDragging(true); | |
onMouseMove(e); | |
} | |
function onMouseMove(e) { | |
if (!dragging) return; | |
const rect = e.currentTarget.getBoundingClientRect(); | |
const u = scaler.ix(e.clientX - rect.left); | |
const v = scaler.iy(e.clientY - rect.top); | |
setQuery([Math.max(0, Math.min(1, u)), Math.max(0, Math.min(1, v))]); | |
} | |
function onMouseUp() { setDragging(false); } | |
const nearest = result[0]; | |
const qpx = { x: scaler.sx(query[0]), y: scaler.sy(query[1]) }; | |
const npx = nearest ? { x: scaler.sx(nearest.x[0]), y: scaler.sy(nearest.x[1]) } : null; | |
return ( | |
<div className="w-full h-full p-6 flex flex-col gap-4"> | |
<div className="text-xl font-semibold">FAISS "index" as a toy: add vectors, then search the nearest one.</div> | |
<div className="text-sm text-gray-600 leading-relaxed"> | |
An index here is just a smart container. You can <span className="font-semibold">add</span> vectors to it; later you can <span className="font-semibold">search</span> the container with a query vector. The index returns the ids of the closest stored vectors by Euclidean distance (L2). This playground shows the simplest style, analogous to <code>IndexFlatL2</code>: it stores all points and linearly scans them when you search. | |
</div> | |
<div className="flex items-center gap-3 flex-wrap"> | |
<button className="px-3 py-1.5 rounded-2xl bg-black text-white shadow" onClick={() => addRandom(200)}>Add 200 random points</button> | |
<button className="px-3 py-1.5 rounded-2xl bg-gray-900 text-white/90 shadow" onClick={() => addRandom(1000)}>Add 1,000 random</button> | |
<button className="px-3 py-1.5 rounded-2xl bg-white border shadow" onClick={clearAll}>Clear</button> | |
<div className="ml-2 text-sm">k-neighbors:</div> | |
<input className="border rounded px-2 py-1 w-16" type="number" min={1} max={20} value={k} onChange={(e) => setK(Math.max(1, Math.min(20, parseInt(e.target.value || "1"))))} /> | |
<div className="text-sm text-gray-600">points in index: <span className="font-semibold">{points.length}</span></div> | |
</div> | |
<div className="rounded-2xl border bg-white shadow overflow-hidden"> | |
<svg | |
width={W} | |
height={H} | |
onClick={onSvgClick} | |
onMouseDown={onMouseDown} | |
onMouseMove={onMouseMove} | |
onMouseUp={onMouseUp} | |
onMouseLeave={onMouseUp} | |
className="cursor-crosshair block select-none" | |
> | |
{/* axes box */} | |
<rect x={scaler.x0} y={scaler.y0} width={scaler.x1 - scaler.x0} height={scaler.y1 - scaler.y0} fill="#fafafa" stroke="#e5e7eb" /> | |
{/* grid */} | |
{Array.from({ length: 9 }).map((_, i) => { | |
const t = (i + 1) / 10; | |
return ( | |
<g key={i}> | |
<line x1={scaler.sx(t)} y1={scaler.y0} x2={scaler.sx(t)} y2={scaler.y1} stroke="#f0f0f0" /> | |
<line x1={scaler.x0} y1={scaler.sy(t)} x2={scaler.x1} y2={scaler.sy(t)} stroke="#f0f0f0" /> | |
</g> | |
); | |
})} | |
{/* stored points */} | |
{points.map((p) => { | |
const X = scaler.sx(p.x[0]); | |
const Y = scaler.sy(p.x[1]); | |
const isNN = result.find((r) => r.id === p.id); | |
return ( | |
<g key={p.id}> | |
<circle cx={X} cy={Y} r={isNN ? 5 : 3} fill={isNN ? "#1f2937" : "#6b7280"} opacity={isNN ? 1 : 0.8} /> | |
</g> | |
); | |
})} | |
{/* query point */} | |
<g> | |
<circle cx={qpx.x} cy={qpx.y} r={7} fill="#2563eb" /> | |
<circle cx={qpx.x} cy={qpx.y} r={14} fill="none" stroke="#2563eb" strokeDasharray="4 4" opacity={0.7} /> | |
</g> | |
{/* link to nearest neighbor(s) */} | |
{result.map((r, idx) => { | |
const x2 = scaler.sx(r.x[0]); | |
const y2 = scaler.sy(r.x[1]); | |
return ( | |
<g key={r.id}> | |
<line x1={qpx.x} y1={qpx.y} x2={x2} y2={y2} stroke={idx === 0 ? "#ef4444" : "#94a3b8"} strokeWidth={idx === 0 ? 2.5 : 1.5} opacity={0.9} /> | |
</g> | |
); | |
})} | |
</svg> | |
</div> | |
<div className="text-sm text-gray-700"> | |
Drag the blue dot to move the query vector. Click anywhere inside the chart to add a new vector to the index. The nearest neighbors are highlighted in dark and connected by lines. The metric is Euclidean distance in 2D; the length of the red line is exactly the L2 distance to the closest stored vector. Increasing k shows the k-nearest neighbors. | |
</div> | |
<StatsPanel query={query} result={result} /> | |
</div> | |
); | |
} | |
function StatsPanel({ query, result }) { | |
return ( | |
<div className="grid grid-cols-1 md:grid-cols-2 gap-3"> | |
<div className="rounded-xl border p-3 bg-white shadow-sm"> | |
<div className="text-sm font-semibold mb-1">Query</div> | |
<div className="text-sm text-gray-700">q = [ {query[0].toFixed(3)}, {query[1].toFixed(3)} ]</div> | |
</div> | |
<div className="rounded-xl border p-3 bg-white shadow-sm"> | |
<div className="text-sm font-semibold mb-1">Top neighbors</div> | |
{result.length === 0 ? ( | |
<div className="text-sm text-gray-500">No points yet—click the plot to add some.</div> | |
) : ( | |
<ul className="text-sm text-gray-700 space-y-1"> | |
{result.map((r, i) => ( | |
<li key={r.id}>#{i + 1} → id {r.id} at [ {r.x[0].toFixed(3)}, {r.x[1].toFixed(3)} ] · L2 = {r.dist.toFixed(4)}</li> | |
))} | |
</ul> | |
)} | |
</div> | |
</div> | |
); | |
} | |
// tiny reproducible RNG | |
function mulberry32(a) { | |
return function() { | |
a |= 0; a = (a + 0x6D2B79F5) | 0; let t = Math.imul(a ^ (a >>> 15), 1 | a); | |
t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t; return ((t ^ (t >>> 14)) >>> 0) / 4294967296; | |
}; | |
} | |
// ========= New panel: sentence-embedding landing map ========= | |
function EmbeddingLandingMap() { | |
const W = 720, H = 520; | |
const scaler = React.useMemo(() => makeScaler(W, H, 28), []); | |
const seed0 = 1337; | |
const topics = React.useMemo(() => ([ | |
{ name: "Tech", center: [0.82, 0.70], color: "#1f77b4", kw: ["ai","ml","model","neural","python","cuda","gpu","faiss","embedding","vector","index","bert","llm"] }, | |
{ name: "Sports", center: [0.20, 0.82], color: "#2ca02c", kw: ["football","soccer","goal","match","league","player","tournament","score","ball"] }, | |
{ name: "Food", center: [0.28, 0.22], color: "#ff7f0e", kw: ["food","pizza","recipe","cook","spice","flavor","taste","bake","grill"] }, | |
{ name: "Finance", center: [0.82, 0.22], color: "#9467bd", kw: ["finance","stock","market","bond","bank","investment","crypto","cash","equity"] }, | |
{ name: "Travel", center: [0.16, 0.45], color: "#d62728", kw: ["travel","flight","airport","hotel","city","beach","mountain","tour","trip"] } | |
]), []); | |
const [corpus] = React.useState(() => { | |
// 1000 points clustered around topic centers | |
let rng = mulberry32(seed0); | |
const pts = []; | |
const per = 1000 / topics.length; | |
for (let t = 0; t < topics.length; t++) { | |
for (let i = 0; i < per; i++) { | |
const [cx, cy] = topics[t].center; | |
// small gaussian-ish jitter via Box–Muller like transform | |
const r1 = Math.sqrt(-2 * Math.log(1 - rng())); | |
const r2 = Math.sqrt(-2 * Math.log(1 - rng())); | |
const ang1 = 2 * Math.PI * rng(); | |
const ang2 = 2 * Math.PI * rng(); | |
const dx = 0.06 * r1 * Math.cos(ang1); | |
const dy = 0.06 * r2 * Math.sin(ang2); | |
const x = clamp01(cx + dx); | |
const y = clamp01(cy + dy); | |
pts.push({ id: pts.length + 1, x: [x, y], topic: topics[t].name, color: topics[t].color }); | |
} | |
} | |
return pts; | |
}); | |
const [k, setK] = React.useState(5); | |
const [text, setText] = React.useState("Find me a good CUDA tutorial for GPUs"); | |
const [query, setQuery] = React.useState(() => embedText2D(text, topics)); | |
const [dragging, setDragging] = React.useState(false); | |
React.useEffect(() => { setQuery(embedText2D(text, topics)); }, [text, topics]); | |
const result = React.useMemo(() => { | |
const dists = corpus.map((v) => ({ id: v.id, x: v.x, topic: v.topic, color: v.color, dist: l2(query, v.x) })); | |
dists.sort((a, b) => a.dist - b.dist); | |
return dists.slice(0, Math.min(k, dists.length)); | |
}, [corpus, query, k]); | |
function onSvgMouseDown(e) { | |
setDragging(true); onSvgMouseMove(e); | |
} | |
function onSvgMouseMove(e) { | |
if (!dragging) return; | |
const rect = e.currentTarget.getBoundingClientRect(); | |
const u = scaler.ix(e.clientX - rect.left); | |
const v = scaler.iy(e.clientY - rect.top); | |
setQuery([clamp01(u), clamp01(v)]); | |
} | |
function onSvgMouseUp() { setDragging(false); } | |
const qpx = { x: scaler.sx(query[0]), y: scaler.sy(query[1]) }; | |
return ( | |
<div className="w-full h-full p-6 flex flex-col gap-4"> | |
<div className="text-xl font-semibold">Where does a new sentence "land"? — embedding map</div> | |
<div className="text-sm text-gray-600 leading-relaxed"> | |
Imagine you already embedded 1,000 sentences. Those live as points in a space. A new sentence is run through the same encoder to get a new point, which then lands somewhere in that same space. Here we show a toy 2D embedding space with clustered topics. Type a sentence to place the blue star. Drag the star if you want to move it manually. The highlighted ringed dots are the k nearest neighbors by L2 distance. | |
</div> | |
<div className="flex items-center gap-3 flex-wrap"> | |
<input value={text} onChange={(e) => setText(e.target.value)} className="border rounded px-3 py-2 min-w-[320px] flex-1" placeholder="Type a test sentence here" /> | |
<div className="text-sm">k:</div> | |
<input className="border rounded px-2 py-1 w-16" type="number" min={1} max={30} value={k} onChange={(e) => setK(Math.max(1, Math.min(30, parseInt(e.target.value || "5"))))} /> | |
<div className="text-sm text-gray-600">corpus size: <span className="font-semibold">{corpus.length}</span></div> | |
</div> | |
<div className="rounded-2xl border bg-white shadow overflow-hidden"> | |
<svg | |
width={W} | |
height={H} | |
onMouseDown={onSvgMouseDown} | |
onMouseMove={onSvgMouseMove} | |
onMouseUp={onSvgMouseUp} | |
onMouseLeave={onSvgMouseUp} | |
className="cursor-crosshair block select-none" | |
> | |
<rect x={scaler.x0} y={scaler.y0} width={scaler.x1 - scaler.x0} height={scaler.y1 - scaler.y0} fill="#fafafa" stroke="#e5e7eb" /> | |
{Array.from({ length: 9 }).map((_, i) => { | |
const t = (i + 1) / 10; | |
return ( | |
<g key={i}> | |
<line x1={scaler.sx(t)} y1={scaler.y0} x2={scaler.sx(t)} y2={scaler.y1} stroke="#f0f0f0" /> | |
<line x1={scaler.x0} y1={scaler.sy(t)} x2={scaler.x1} y2={scaler.sy(t)} stroke="#f0f0f0" /> | |
</g> | |
); | |
})} | |
{/* corpus points */} | |
{corpus.map((p) => ( | |
<g key={p.id}> | |
<circle cx={scaler.sx(p.x[0])} cy={scaler.sy(p.x[1])} r={3} fill={p.color} opacity={0.65} /> | |
</g> | |
))} | |
{/* nearest neighbors with halo */} | |
{result.map((r) => ( | |
<g key={r.id}> | |
<circle cx={scaler.sx(r.x[0])} cy={scaler.sy(r.x[1])} r={6} fill="none" stroke="#111827" strokeWidth={1.5} /> | |
</g> | |
))} | |
{/* query star */} | |
<g> | |
<polygon points={starPoints(qpx.x, qpx.y, 10, 4)} fill="#2563eb" /> | |
<circle cx={qpx.x} cy={qpx.y} r={16} fill="none" stroke="#2563eb" strokeDasharray="4 4" opacity={0.8} /> | |
</g> | |
</svg> | |
</div> | |
<div className="text-sm text-gray-700"> | |
The star is your new sentence’s embedding. Its neighbors hint at semantic context. If you see it straddling two clusters, that reflects mixed content—e.g., “GPU performance for football analytics” will drift between Tech and Sports. Because this is a 2D toy, distances are only illustrative; real models use hundreds to thousands of dimensions. | |
</div> | |
<div className="text-sm text-gray-700 grid grid-cols-1 md:grid-cols-2 gap-3"> | |
<div className="rounded-xl border p-3 bg-white shadow-sm"> | |
<div className="text-sm font-semibold mb-1">Top neighbors</div> | |
{result.map((r, i) => ( | |
<div key={r.id} className="text-sm">#{i + 1} · {r.topic} · L2 = {r.dist.toFixed(4)}</div> | |
))} | |
</div> | |
<div className="rounded-xl border p-3 bg-white shadow-sm"> | |
<div className="text-sm font-semibold mb-1">Legend</div> | |
<div className="text-sm text-gray-700 space-y-1"> | |
{topics.map((t) => ( | |
<div key={t.name} className="flex items-center gap-2"><span className="inline-block w-3 h-3 rounded" style={{background:t.color}}></span>{t.name}</div> | |
))} | |
</div> | |
</div> | |
</div> | |
</div> | |
); | |
} | |
function clamp01(x){ return Math.max(0, Math.min(1, x)); } | |
function starPoints(cx, cy, outer, inner) { | |
const pts = []; | |
for (let i = 0; i < 10; i++) { | |
const ang = (Math.PI / 5) * i - Math.PI / 2; | |
const r = i % 2 === 0 ? outer : inner; | |
pts.push([cx + r * Math.cos(ang), cy + r * Math.sin(ang)].join(",")); | |
} | |
return pts.join(" "); | |
} | |
function embedText2D(text, topics) { | |
const tokens = text.toLowerCase().match(/[a-z]+/g) || []; | |
const weights = new Array(topics.length).fill(0); | |
for (let i = 0; i < topics.length; i++) { | |
const kws = new Set(topics[i].kw); | |
for (const tok of tokens) { if (kws.has(tok)) weights[i] += 1; } | |
} | |
let sum = weights.reduce((a,b)=>a+b,0); | |
let x = 0.5, y = 0.5; | |
if (sum > 0) { | |
for (let i = 0; i < topics.length; i++) { x += (topics[i].center[0]-0.5) * (weights[i]/sum); y += (topics[i].center[1]-0.5) * (weights[i]/sum); } | |
} else { | |
// no keywords: fall back to deterministic hashing so similar strings land consistently | |
const h = hash32(text); | |
x = (h % 1000) / 1000; y = ((Math.floor(h/1000)) % 1000) / 1000; | |
} | |
// small jitter by length to avoid exact overlaps | |
const j = (Math.min(text.length, 80) / 80) * 0.015; | |
return [clamp01(x + j/2), clamp01(y + j/3)]; | |
} | |
function hash32(str){ | |
let h = 2166136261 >>> 0; | |
for (let i=0;i<str.length;i++){ h ^= str.charCodeAt(i); h = Math.imul(h, 16777619); } | |
return h >>> 0; | |
} | |
// ========= Wrapper: switch between panels ========= | |
export default function PlaygroundSuite(){ | |
const [tab, setTab] = React.useState("map"); | |
const btn = (active) => `px-3 py-1.5 rounded-2xl border shadow ${active?"bg-black text-white":"bg-white text-black"}`; | |
return ( | |
<div className="w-full h-full p-4"> | |
<div className="flex gap-2 mb-4"> | |
<button className={btn(tab==='map')} onClick={()=>setTab('map')}>Sentence embedding map</button> | |
<button className={btn(tab==='nn')} onClick={()=>setTab('nn')}>Nearest-neighbor index</button> | |
</div> | |
{tab==='map' ? <EmbeddingLandingMap/> : <FaissIndexPlayground/>} | |
</div> | |
); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment