Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save AnteaterKit/47fd39b7280889ffaa1144457e6f1e47 to your computer and use it in GitHub Desktop.

Select an option

Save AnteaterKit/47fd39b7280889ffaa1144457e6f1e47 to your computer and use it in GitHub Desktop.
qdrant index
/**
* Индексация RAG-примеров КП в Qdrant Cloud.
* По аналогии с https://qdrant.tech/documentation/cloud-quickstart/
*/
import { QdrantClient } from "@qdrant/js-client-rest";
import { RAG_EXAMPLES } from "./docling/output/rag-examples.js";
const COLLECTION_NAME = "kp_search";
const EMBED_URL =
(globalThis as { process?: { env?: { EMBED_URL?: string } } }).process?.env?.EMBED_URL ||
" ";
const EMBED_MODEL =
(globalThis as { process?: { env?: { EMBED_MODEL?: string } } }).process?.env?.EMBED_MODEL ||
"qwen3-embedding:8b";
/** Включить вывод «плана» запроса: filter + params (Qdrant не отдаёт внутренний EXPLAIN) */
const DEBUG_SEARCH_PLAN =
(globalThis as { process?: { env?: { DEBUG_SEARCH_PLAN?: string } } }).process?.env?.DEBUG_SEARCH_PLAN === "1";
const client = new QdrantClient({
url: " ",
apiKey:
" ",
});
/** Проверка соединения с Qdrant */
async function checkQdrantConnection(): Promise<void> {
try {
await client.getCollections();
console.log("Qdrant: соединение OK");
} catch (err) {
console.error("Qdrant: ошибка соединения", err);
throw err;
}
}
/** Эмбеддинг через Ollama API */
async function embed(texts: string[]): Promise<number[][]> {
const response = await fetch(`${EMBED_URL.replace(/\/$/, "")}/api/embed`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model: EMBED_MODEL, input: texts }),
});
if (!response.ok) {
throw new Error(`Embed API error! status: ${response.status}`);
}
const data = (await response.json()) as { embeddings: number[][] };
if (!data.embeddings?.length) {
throw new Error("Embed API: empty embeddings");
}
return data.embeddings;
}
const BATCH_SIZE = 1;
/** Параметры фильтра: ключ payload (snake_case, напр. gearbox_type) → желаемое значение */
export type SearchFilter = Record<string, string | number | boolean>;
export interface SearchOptions {
/** Жёсткий фильтр: только точки, где payload совпадает со всеми полями */
filter?: SearchFilter;
/** Мягкий фильтр: предпочитаем точки, где payload совпадает; не исключаем остальные */
prefer?: SearchFilter;
}
function buildQdrantFilter(prefs: SearchFilter): { must: Array<{ key: string; match: { value: string | number | boolean } }> } {
return {
must: Object.entries(prefs).map(([key, value]) => ({
key,
match: { value },
})),
};
}
/** Сколько полей из prefer совпадает с payload */
function countPreferMatches(
payload: Record<string, unknown> | undefined,
prefer: SearchFilter
): number {
if (!payload) return 0;
let n = 0;
for (const [key, value] of Object.entries(prefer)) {
if (payload[key] === value) n++;
}
return n;
}
/** Проверка поиска по запросу */
async function searchTest(
query: string,
limit = 20,
options?: SearchOptions
): Promise<void> {
const queryVector = (await embed([query]))[0];
if (!queryVector) throw new Error("Embed API: query vector empty");
const prefer = options?.prefer;
const soft = Boolean(prefer);
const hardFilter = options?.filter ? buildQdrantFilter(options.filter) : undefined;
const requestLimit = soft && prefer ? Math.max(limit * 5, 20) : limit;
const filter = {
should: [
{ key: "cab_sleeping", match: { value: false } },
{ key: "air_conditioning", match: { value: false } },
{ key: "tachograph", match: { value: true } },
],
};
const params = { hnsw_ef: 128, exact: false };
if (DEBUG_SEARCH_PLAN) {
console.log("\n📋 План запроса (filter + params):");
console.log(JSON.stringify({ filter, params, limit: requestLimit }, null, 2));
}
let results = await client.search(COLLECTION_NAME, {
vector: queryVector,
with_payload: true, // чтобы вернуть payload с атрибутами
limit: requestLimit,
filter,
params,
});
if (soft && prefer && results.length > 0) {
results = [...results].sort((a, b) => {
const matchA = countPreferMatches(a.payload as Record<string, unknown>, prefer);
const matchB = countPreferMatches(b.payload as Record<string, unknown>, prefer);
if (matchB !== matchA) return matchB - matchA;
return (b.score ?? 0) - (a.score ?? 0);
}).slice(0, limit);
}
console.log(`\nПоиск по запросу: "${query}"${prefer ? ` (prefer: ${JSON.stringify(prefer)})` : ""}`);
for (const r of results) {
console.log(
` - doc_id: ${r.payload?.doc_id}, score: ${r.score?.toFixed(4)}, query: ${r.payload?.query_user}`
);
if (prefer && Object.keys(prefer).length > 0) {
const payload = (r.payload ?? {}) as Record<string, unknown>;
const lines = Object.entries(prefer).map(([key, want]) => {
const has = key in payload;
const val = payload[key];
const match = has && val === want;
const icon = match ? "✅" : "❌";
const wantStr = String(want);
const valStr = has ? String(val) : "—";
return ` ${icon} ${key}: ${match ? wantStr : `ожидали «${wantStr}», есть «${valStr}»`}`;
});
console.log(" 🎯 soft-атрибуты:\n" + lines.join("\n"));
}
}
}
async function index() {
// 1. Проверка соединения с Qdrant
await checkQdrantConnection();
// 2. Размерность вектора из embed API
const probe = (await embed(["probe"]))[0];
if (!probe) throw new Error("Embed API: probe returned empty");
const vectorSize = probe.length;
console.log(`Embed API: dimension=${vectorSize}`);
// 3. Создание/пересоздание коллекции
try {
const collections = await client.getCollections();
const exists = collections.collections.some((c) => c.name === COLLECTION_NAME);
if (exists) {
const info = (await client.getCollection(COLLECTION_NAME)) as {
config?: { params?: { vectors?: { size?: number } } };
};
const configSize = info.config?.params?.vectors?.size;
if (configSize !== vectorSize) {
console.log(
`Коллекция "${COLLECTION_NAME}": размер ${configSize} != ${vectorSize}, пересоздаём`
);
await client.deleteCollection(COLLECTION_NAME);
await client.createCollection(COLLECTION_NAME, {
vectors: { size: vectorSize, distance: "Cosine" },
});
console.log(`Коллекция "${COLLECTION_NAME}" пересоздана с dimension=${vectorSize}`);
} else {
console.log(`Коллекция "${COLLECTION_NAME}" уже существует`);
}
} else {
await client.createCollection(COLLECTION_NAME, {
vectors: { size: vectorSize, distance: "Cosine" },
});
console.log(`Коллекция "${COLLECTION_NAME}" создана`);
}
} catch (err) {
console.error("Ошибка создания коллекции:", err);
throw err;
}
// 3. Подготовка точек и upsert после каждой итерации
for (let i = 0; i < RAG_EXAMPLES.length; i += BATCH_SIZE) {
const batch = RAG_EXAMPLES.slice(i, i + BATCH_SIZE);
const texts = batch.map((ex) => ex.queryUser);
const vectors = await embed(texts);
const points = batch.map((ex, j) => ({
id: i + j,
vector: vectors[j]!,
payload: {
doc_id: ex.docId,
query_user: ex.queryUser,
...ex.attributes,
},
}));
await client.upsert(COLLECTION_NAME, {
wait: true,
points,
});
console.log(` эмбеддингов: ${i + batch.length}/${RAG_EXAMPLES.length}`);
}
// 4. Payload-индексы для фильтрации (cab_sleeping, air_conditioning, tachograph)
const PAYLOAD_INDEX_FIELDS: { field_name: string; field_schema: "bool" }[] = [
{ field_name: "cab_sleeping", field_schema: "bool" },
{ field_name: "air_conditioning", field_schema: "bool" },
{ field_name: "tachograph", field_schema: "bool" },
];
for (const { field_name, field_schema } of PAYLOAD_INDEX_FIELDS) {
try {
await client.createPayloadIndex(COLLECTION_NAME, { field_name, field_schema, wait: true });
console.log(` индекс payload: ${field_name}`);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
if (msg.includes("already exists") || msg.includes("AlreadyExists")) {
console.log(` индекс payload: ${field_name} (уже есть)`);
} else {
console.warn(` индекс payload ${field_name}:`, msg);
}
}
}
console.log("Индексация завершена");
await searchTest("грузовик с подогревом сидений", 3, {
prefer: { gearbox_type: "механическая" },
});
}
const args = process.argv.slice(2);
const cmd = args[0];
const query = args[1] ?? "грузовик с подогревом сидений";
if (cmd === "search") {
searchTest(query, 20, {
prefer: { cab_sleeping: false, air_conditioning: false, tachograph: true },
}).catch((err) => {
console.error(err);
process.exit(1);
});
} else {
index().catch((err) => {
console.error(err);
process.exit(1);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment