Download data from http://ann-benchmarks.com/gist-960-euclidean.hdf5
wget http://ann-benchmarks.com/gist-960-euclidean.hdf5
mv gist-960-euclidean.hdf5 gist.hdf5Insert data into data table:
Download data from http://ann-benchmarks.com/gist-960-euclidean.hdf5
wget http://ann-benchmarks.com/gist-960-euclidean.hdf5
mv gist-960-euclidean.hdf5 gist.hdf5Insert data into data table:
conda create -n data PYTHON=3.11
conda activate data
conda install conda-forge::pgvector-python numpy pytorch::faiss-cpu conda-forge::psycopg tqdm
EXPORT OMP_WAIT_POLICY=PASSIVE
python sample_kmeans_export.py \
--url "postgresql://postgres:123@localhost:5432/postgres" \
--table embeddings2 \Warning
The VectorChord v0.5.2 is recommended.
Please see step 4 for more information.
You can still use the original script.
G: with nvidia GPU, such like A10 to build external K-means, with minimum hardware requirements:
D: An instance like i7ie.2xlarge to do the benchmark, with minimum hardware requirements:
| from matplotlib import ticker | |
| import numpy as np | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| QPS = [13.42, 12.32, 11.23, 8.75, 27.65, 25.36, 20.41, 16.66] | |
| original_data = { | |
| "acc": [ |
| import argparse | |
| import datetime | |
| import logging | |
| import math | |
| import psutil | |
| import numpy as np | |
| import psycopg | |
| from tqdm import tqdm | |
| from pgvector.psycopg import register_vector |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| qps = [25.69, 18.91, 17.13, 16.18, 14.45, 7.24, 4.51] | |
| recall = [0.9079, 0.9461, 0.9497, 0.9535, 0.9608, 0.9650, 0.9672] | |
| p99_latency = [148.25, 260.68, 269.5, 302.29, 456.09, 1042.41, 1675.42] | |
| fig, (ax1, ax3) = plt.subplots(1, 2, figsize=(12, 5), gridspec_kw={'width_ratios': [20, 7]}) |
| from os.path import join | |
| import os | |
| import time | |
| import argparse | |
| from pathlib import Path | |
| from tqdm import tqdm | |
| import psycopg | |
| import h5py | |
| from pgvecto_rs.psycopg import register_vector |
| import numpy as np | |
| from pgvecto_rs.psycopg import register_vector | |
| import psycopg | |
| # generate random data | |
| rows = 100000 | |
| dimensions = 128 | |
| embeddings = np.random.rand(rows, dimensions) | |
| categories = np.random.randint(100, size=rows).tolist() | |
| queries = np.random.rand(10, dimensions) |