conda create -n data PYTHON=3.11
conda activate data
conda install conda-forge::pgvector-python numpy pytorch::faiss-cpu conda-forge::psycopg tqdm
EXPORT OMP_WAIT_POLICY=PASSIVE
python sample_kmeans_export.py \
--url "postgresql://postgres:123@localhost:5432/postgres" \
--table embeddings2 \
Warning
The VectorChord v0.5.2
is recommended.
Please see step 4 for more information.
You can still use the original script.
- Machine
G
: with nvidia GPU, such likeA10
to build external K-means, with minimum hardware requirements:- at least 120GB memory
- Machine
D
: An instance likei7ie.2xlarge
to do the benchmark, with minimum hardware requirements:- at least 4000G NVME SSD
- at least 64GB memory
- at least 8 vcpus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from matplotlib import ticker | |
import numpy as np | |
import pandas as pd | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
QPS = [13.42, 12.32, 11.23, 8.75, 27.65, 25.36, 20.41, 16.66] | |
original_data = { | |
"acc": [ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import datetime | |
import logging | |
import math | |
import psutil | |
import numpy as np | |
import psycopg | |
from tqdm import tqdm | |
from pgvector.psycopg import register_vector |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import seaborn as sns | |
import matplotlib.pyplot as plt | |
qps = [25.69, 18.91, 17.13, 16.18, 14.45, 7.24, 4.51] | |
recall = [0.9079, 0.9461, 0.9497, 0.9535, 0.9608, 0.9650, 0.9672] | |
p99_latency = [148.25, 260.68, 269.5, 302.29, 456.09, 1042.41, 1675.42] | |
fig, (ax1, ax3) = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={'width_ratios': [20, 7]}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import join | |
import os | |
import time | |
import argparse | |
from pathlib import Path | |
from tqdm import tqdm | |
import psycopg | |
import h5py | |
from pgvecto_rs.psycopg import register_vector |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from pgvecto_rs.psycopg import register_vector | |
import psycopg | |
# generate random data | |
rows = 100000 | |
dimensions = 128 | |
embeddings = np.random.rand(rows, dimensions) | |
categories = np.random.randint(100, size=rows).tolist() | |
queries = np.random.rand(10, dimensions) |