Last active
December 11, 2023 04:32
-
-
Save kacperlukawski/2d3a3225f15a4cc5772cd1c81866340d to your computer and use it in GitHub Desktop.
Qdrant tips&tricks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "385fc2ea", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:52:47.900517Z", | |
"start_time": "2023-03-13T11:52:47.700693Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import config\n", | |
"import func\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "51f36c37", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:52:48.194538Z", | |
"start_time": "2023-03-13T11:52:47.905511Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from tqdm import tqdm\n", | |
"from qdrant_client import QdrantClient\n", | |
"from qdrant_client.http import models as rest" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "a86e5c3f", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:52:48.271822Z", | |
"start_time": "2023-03-13T11:52:48.196267Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"client = QdrantClient(\n", | |
" url=\"http://localhost\",\n", | |
" prefer_grpc=True,\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "404b2b50", | |
"metadata": {}, | |
"source": [ | |
"# Memory usage" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "e2b63056", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:52:49.954056Z", | |
"start_time": "2023-03-13T11:52:48.274622Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\r\n", | |
"51ed01f404f0 qdrant-tips-tricks_qdrant_1 0.25% 655.3MiB / 30.81GiB 2.08% 457MB / 452kB 0B / 1.83GB 54\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!docker stats --no-stream" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7697736c", | |
"metadata": {}, | |
"source": [ | |
"# On disk payloads" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "59db6a5d", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:52:49.984506Z", | |
"start_time": "2023-03-13T11:52:49.961517Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=50001, indexed_vectors_count=50001, points_count=50001, segments_count=5, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=2048, distance=<Distance.COSINE: 'Cosine'>), shard_number=1, replication_factor=1, write_consistency_factor=1, on_disk_payload=True), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=10000, flush_interval_sec=5, max_optimization_threads=1), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0)), payload_schema={})" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.get_collection(collection_name=config.COLLECTION_NAME)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "5388856c", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:52:50.512028Z", | |
"start_time": "2023-03-13T11:52:49.989242Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.recreate_collection(\n", | |
" collection_name=config.COLLECTION_NAME,\n", | |
" vectors_config=rest.VectorParams(\n", | |
" size=config.VECTOR_SIZE,\n", | |
" distance=rest.Distance.COSINE,\n", | |
" ),\n", | |
" on_disk_payload=True,\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "f146d480", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:59:54.998349Z", | |
"start_time": "2023-03-13T11:53:09.728184Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"51it [06:45, 7.94s/it] \n" | |
] | |
} | |
], | |
"source": [ | |
"max_num = 50_000\n", | |
"batch_size = 1000\n", | |
"\n", | |
"objects = func.iterate_objects(max_num=max_num)\n", | |
"batched_objects = func.batchify_objects(objects, n=batch_size)\n", | |
"for batch in tqdm(batched_objects, total=max_num // batch_size):\n", | |
" ids, vectors, payloads = batch\n", | |
" client.upsert(\n", | |
" collection_name=config.COLLECTION_NAME,\n", | |
" points=rest.Batch(\n", | |
" ids=ids,\n", | |
" vectors=vectors,\n", | |
" payloads=payloads,\n", | |
" )\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "1edd657a", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:59:56.685866Z", | |
"start_time": "2023-03-13T11:59:55.000790Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\r\n", | |
"51ed01f404f0 qdrant-tips-tricks_qdrant_1 3.22% 843.2MiB / 30.81GiB 2.67% 915MB / 854kB 0B / 8.46GB 55\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!docker stats --no-stream" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f47b41ed", | |
"metadata": {}, | |
"source": [ | |
"# Memmap support" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "b9edc24a", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T11:59:57.274388Z", | |
"start_time": "2023-03-13T11:59:56.691823Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.recreate_collection(\n", | |
" collection_name=config.COLLECTION_NAME,\n", | |
" vectors_config=rest.VectorParams(\n", | |
" size=config.VECTOR_SIZE,\n", | |
" distance=rest.Distance.COSINE,\n", | |
" ),\n", | |
" on_disk_payload=True,\n", | |
" optimizers_config=rest.OptimizersConfigDiff(\n", | |
" memmap_threshold=10_000, # 1K KBs\n", | |
" ),\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "46b0b9ce", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T12:10:13.764017Z", | |
"start_time": "2023-03-13T11:59:57.276357Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"51it [10:16, 12.09s/it] \n" | |
] | |
} | |
], | |
"source": [ | |
"objects = func.iterate_objects(max_num=max_num)\n", | |
"batched_objects = func.batchify_objects(objects, n=batch_size)\n", | |
"for batch in tqdm(batched_objects, total=max_num // batch_size):\n", | |
" ids, vectors, payloads = batch\n", | |
" client.upsert(\n", | |
" collection_name=config.COLLECTION_NAME,\n", | |
" points=rest.Batch(\n", | |
" ids=ids,\n", | |
" vectors=vectors,\n", | |
" payloads=payloads,\n", | |
" )\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "3447c869", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T13:22:18.876054Z", | |
"start_time": "2023-03-13T13:22:18.865702Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"CollectionInfo(status=<CollectionStatus.GREEN: 'green'>, optimizer_status=<OptimizersStatusOneOf.OK: 'ok'>, vectors_count=50001, indexed_vectors_count=50001, points_count=50001, segments_count=2, config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=2048, distance=<Distance.COSINE: 'Cosine'>), shard_number=1, replication_factor=1, write_consistency_factor=1, on_disk_payload=True), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=10000, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=1), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0)), payload_schema={})" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"client.get_collection(collection_name=config.COLLECTION_NAME)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "55bcff36", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2023-03-13T13:22:22.277016Z", | |
"start_time": "2023-03-13T13:22:20.576555Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS\r\n", | |
"51ed01f404f0 qdrant-tips-tricks_qdrant_1 0.23% 466.2MiB / 30.81GiB 1.48% 1.37GB / 1.31MB 0B / 15GB 54\r\n" | |
] | |
} | |
], | |
"source": [ | |
"!docker stats --no-stream" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "9d0578b8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment