Created
March 2, 2026 11:06
-
-
Save AnteaterKit/47fd39b7280889ffaa1144457e6f1e47 to your computer and use it in GitHub Desktop.
qdrant index
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Индексация RAG-примеров КП в Qdrant Cloud. | |
| * По аналогии с https://qdrant.tech/documentation/cloud-quickstart/ | |
| */ | |
| import { QdrantClient } from "@qdrant/js-client-rest"; | |
| import { RAG_EXAMPLES } from "./docling/output/rag-examples.js"; | |
| const COLLECTION_NAME = "kp_search"; | |
| const EMBED_URL = | |
| (globalThis as { process?: { env?: { EMBED_URL?: string } } }).process?.env?.EMBED_URL || | |
| " "; | |
| const EMBED_MODEL = | |
| (globalThis as { process?: { env?: { EMBED_MODEL?: string } } }).process?.env?.EMBED_MODEL || | |
| "qwen3-embedding:8b"; | |
| /** Включить вывод «плана» запроса: filter + params (Qdrant не отдаёт внутренний EXPLAIN) */ | |
| const DEBUG_SEARCH_PLAN = | |
| (globalThis as { process?: { env?: { DEBUG_SEARCH_PLAN?: string } } }).process?.env?.DEBUG_SEARCH_PLAN === "1"; | |
| const client = new QdrantClient({ | |
| url: " ", | |
| apiKey: | |
| " ", | |
| }); | |
| /** Проверка соединения с Qdrant */ | |
| async function checkQdrantConnection(): Promise<void> { | |
| try { | |
| await client.getCollections(); | |
| console.log("Qdrant: соединение OK"); | |
| } catch (err) { | |
| console.error("Qdrant: ошибка соединения", err); | |
| throw err; | |
| } | |
| } | |
| /** Эмбеддинг через Ollama API */ | |
| async function embed(texts: string[]): Promise<number[][]> { | |
| const response = await fetch(`${EMBED_URL.replace(/\/$/, "")}/api/embed`, { | |
| method: "POST", | |
| headers: { "Content-Type": "application/json" }, | |
| body: JSON.stringify({ model: EMBED_MODEL, input: texts }), | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`Embed API error! status: ${response.status}`); | |
| } | |
| const data = (await response.json()) as { embeddings: number[][] }; | |
| if (!data.embeddings?.length) { | |
| throw new Error("Embed API: empty embeddings"); | |
| } | |
| return data.embeddings; | |
| } | |
| const BATCH_SIZE = 1; | |
| /** Параметры фильтра: ключ payload (snake_case, напр. gearbox_type) → желаемое значение */ | |
| export type SearchFilter = Record<string, string | number | boolean>; | |
| export interface SearchOptions { | |
| /** Жёсткий фильтр: только точки, где payload совпадает со всеми полями */ | |
| filter?: SearchFilter; | |
| /** Мягкий фильтр: предпочитаем точки, где payload совпадает; не исключаем остальные */ | |
| prefer?: SearchFilter; | |
| } | |
| function buildQdrantFilter(prefs: SearchFilter): { must: Array<{ key: string; match: { value: string | number | boolean } }> } { | |
| return { | |
| must: Object.entries(prefs).map(([key, value]) => ({ | |
| key, | |
| match: { value }, | |
| })), | |
| }; | |
| } | |
| /** Сколько полей из prefer совпадает с payload */ | |
| function countPreferMatches( | |
| payload: Record<string, unknown> | undefined, | |
| prefer: SearchFilter | |
| ): number { | |
| if (!payload) return 0; | |
| let n = 0; | |
| for (const [key, value] of Object.entries(prefer)) { | |
| if (payload[key] === value) n++; | |
| } | |
| return n; | |
| } | |
| /** Проверка поиска по запросу */ | |
| async function searchTest( | |
| query: string, | |
| limit = 20, | |
| options?: SearchOptions | |
| ): Promise<void> { | |
| const queryVector = (await embed([query]))[0]; | |
| if (!queryVector) throw new Error("Embed API: query vector empty"); | |
| const prefer = options?.prefer; | |
| const soft = Boolean(prefer); | |
| const hardFilter = options?.filter ? buildQdrantFilter(options.filter) : undefined; | |
| const requestLimit = soft && prefer ? Math.max(limit * 5, 20) : limit; | |
| const filter = { | |
| should: [ | |
| { key: "cab_sleeping", match: { value: false } }, | |
| { key: "air_conditioning", match: { value: false } }, | |
| { key: "tachograph", match: { value: true } }, | |
| ], | |
| }; | |
| const params = { hnsw_ef: 128, exact: false }; | |
| if (DEBUG_SEARCH_PLAN) { | |
| console.log("\n📋 План запроса (filter + params):"); | |
| console.log(JSON.stringify({ filter, params, limit: requestLimit }, null, 2)); | |
| } | |
| let results = await client.search(COLLECTION_NAME, { | |
| vector: queryVector, | |
| with_payload: true, // чтобы вернуть payload с атрибутами | |
| limit: requestLimit, | |
| filter, | |
| params, | |
| }); | |
| if (soft && prefer && results.length > 0) { | |
| results = [...results].sort((a, b) => { | |
| const matchA = countPreferMatches(a.payload as Record<string, unknown>, prefer); | |
| const matchB = countPreferMatches(b.payload as Record<string, unknown>, prefer); | |
| if (matchB !== matchA) return matchB - matchA; | |
| return (b.score ?? 0) - (a.score ?? 0); | |
| }).slice(0, limit); | |
| } | |
| console.log(`\nПоиск по запросу: "${query}"${prefer ? ` (prefer: ${JSON.stringify(prefer)})` : ""}`); | |
| for (const r of results) { | |
| console.log( | |
| ` - doc_id: ${r.payload?.doc_id}, score: ${r.score?.toFixed(4)}, query: ${r.payload?.query_user}` | |
| ); | |
| if (prefer && Object.keys(prefer).length > 0) { | |
| const payload = (r.payload ?? {}) as Record<string, unknown>; | |
| const lines = Object.entries(prefer).map(([key, want]) => { | |
| const has = key in payload; | |
| const val = payload[key]; | |
| const match = has && val === want; | |
| const icon = match ? "✅" : "❌"; | |
| const wantStr = String(want); | |
| const valStr = has ? String(val) : "—"; | |
| return ` ${icon} ${key}: ${match ? wantStr : `ожидали «${wantStr}», есть «${valStr}»`}`; | |
| }); | |
| console.log(" 🎯 soft-атрибуты:\n" + lines.join("\n")); | |
| } | |
| } | |
| } | |
| async function index() { | |
| // 1. Проверка соединения с Qdrant | |
| await checkQdrantConnection(); | |
| // 2. Размерность вектора из embed API | |
| const probe = (await embed(["probe"]))[0]; | |
| if (!probe) throw new Error("Embed API: probe returned empty"); | |
| const vectorSize = probe.length; | |
| console.log(`Embed API: dimension=${vectorSize}`); | |
| // 3. Создание/пересоздание коллекции | |
| try { | |
| const collections = await client.getCollections(); | |
| const exists = collections.collections.some((c) => c.name === COLLECTION_NAME); | |
| if (exists) { | |
| const info = (await client.getCollection(COLLECTION_NAME)) as { | |
| config?: { params?: { vectors?: { size?: number } } }; | |
| }; | |
| const configSize = info.config?.params?.vectors?.size; | |
| if (configSize !== vectorSize) { | |
| console.log( | |
| `Коллекция "${COLLECTION_NAME}": размер ${configSize} != ${vectorSize}, пересоздаём` | |
| ); | |
| await client.deleteCollection(COLLECTION_NAME); | |
| await client.createCollection(COLLECTION_NAME, { | |
| vectors: { size: vectorSize, distance: "Cosine" }, | |
| }); | |
| console.log(`Коллекция "${COLLECTION_NAME}" пересоздана с dimension=${vectorSize}`); | |
| } else { | |
| console.log(`Коллекция "${COLLECTION_NAME}" уже существует`); | |
| } | |
| } else { | |
| await client.createCollection(COLLECTION_NAME, { | |
| vectors: { size: vectorSize, distance: "Cosine" }, | |
| }); | |
| console.log(`Коллекция "${COLLECTION_NAME}" создана`); | |
| } | |
| } catch (err) { | |
| console.error("Ошибка создания коллекции:", err); | |
| throw err; | |
| } | |
| // 3. Подготовка точек и upsert после каждой итерации | |
| for (let i = 0; i < RAG_EXAMPLES.length; i += BATCH_SIZE) { | |
| const batch = RAG_EXAMPLES.slice(i, i + BATCH_SIZE); | |
| const texts = batch.map((ex) => ex.queryUser); | |
| const vectors = await embed(texts); | |
| const points = batch.map((ex, j) => ({ | |
| id: i + j, | |
| vector: vectors[j]!, | |
| payload: { | |
| doc_id: ex.docId, | |
| query_user: ex.queryUser, | |
| ...ex.attributes, | |
| }, | |
| })); | |
| await client.upsert(COLLECTION_NAME, { | |
| wait: true, | |
| points, | |
| }); | |
| console.log(` эмбеддингов: ${i + batch.length}/${RAG_EXAMPLES.length}`); | |
| } | |
| // 4. Payload-индексы для фильтрации (cab_sleeping, air_conditioning, tachograph) | |
| const PAYLOAD_INDEX_FIELDS: { field_name: string; field_schema: "bool" }[] = [ | |
| { field_name: "cab_sleeping", field_schema: "bool" }, | |
| { field_name: "air_conditioning", field_schema: "bool" }, | |
| { field_name: "tachograph", field_schema: "bool" }, | |
| ]; | |
| for (const { field_name, field_schema } of PAYLOAD_INDEX_FIELDS) { | |
| try { | |
| await client.createPayloadIndex(COLLECTION_NAME, { field_name, field_schema, wait: true }); | |
| console.log(` индекс payload: ${field_name}`); | |
| } catch (e) { | |
| const msg = e instanceof Error ? e.message : String(e); | |
| if (msg.includes("already exists") || msg.includes("AlreadyExists")) { | |
| console.log(` индекс payload: ${field_name} (уже есть)`); | |
| } else { | |
| console.warn(` индекс payload ${field_name}:`, msg); | |
| } | |
| } | |
| } | |
| console.log("Индексация завершена"); | |
| await searchTest("грузовик с подогревом сидений", 3, { | |
| prefer: { gearbox_type: "механическая" }, | |
| }); | |
| } | |
| const args = process.argv.slice(2); | |
| const cmd = args[0]; | |
| const query = args[1] ?? "грузовик с подогревом сидений"; | |
| if (cmd === "search") { | |
| searchTest(query, 20, { | |
| prefer: { cab_sleeping: false, air_conditioning: false, tachograph: true }, | |
| }).catch((err) => { | |
| console.error(err); | |
| process.exit(1); | |
| }); | |
| } else { | |
| index().catch((err) => { | |
| console.error(err); | |
| process.exit(1); | |
| }); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment