Last active
September 6, 2022 16:58
-
-
Save mdouze/11869525966715e84967e510b1e33229 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import faiss\n", | |
"import numpy as np\n", | |
"from faiss.contrib.datasets import SyntheticDataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ds = SyntheticDataset(128, 10000, 1000, 200)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Regular training" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index = faiss.index_factory(ds.d, \"OPQ8,PQ8np\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 16min 59s, sys: 2min 26s, total: 19min 25s\n", | |
"Wall time: 21.7 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time \n", | |
"index.train(ds.get_train())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Gpu training" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index2 = faiss.index_factory(ds.d, \"OPQ8,PQ8np\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# insert a GPU index to do training...\n", | |
"opq2 = faiss.downcast_VectorTransform(index2.chain.at(0))\n", | |
"pq = faiss.ProductQuantizer(opq2.d_out, opq2.M, 8)\n", | |
"assign_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(pq.dsub), ngpu=1)\n", | |
"pq.assign_index = assign_index\n", | |
"opq2.pq = pq" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 4min 14s, sys: 26.4 s, total: 4min 41s\n", | |
"Wall time: 5.23 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time \n", | |
"index2.train(ds.get_train())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# from here on pq and assign_index can be deallocated\n", | |
"# note that the PQ of the IndexPQ was still trained on CPU" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Share the PQ between OPQ and the IndexPQ" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"index3 = faiss.index_factory(ds.d, \"OPQ8,PQ8np\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"opq3 = faiss.downcast_VectorTransform(index3.chain.at(0))\n", | |
"pq = faiss.downcast_index(index3.index).pq\n", | |
"assign_index = faiss.index_cpu_to_all_gpus(faiss.IndexFlatL2(pq.dsub), ngpu=1)\n", | |
"pq.assign_index = assign_index\n", | |
"opq3.pq = pq" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 2min 27s, sys: 11.6 s, total: 2min 39s\n", | |
"Wall time: 2.93 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time \n", | |
"index3.train(ds.get_train())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"bento_stylesheets": { | |
"bento/extensions/flow/main.css": true, | |
"bento/extensions/kernel_selector/main.css": true, | |
"bento/extensions/kernel_ui/main.css": true, | |
"bento/extensions/new_kernel/main.css": true, | |
"bento/extensions/system_usage/main.css": true, | |
"bento/extensions/theme/main.css": true | |
}, | |
"kernelspec": { | |
"display_name": "faiss", | |
"language": "python", | |
"name": "bento_kernel_faiss" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Was this intended to demonstrate training the PQ component of OPQ (in "Gpu training"), as well as training the same PQ in OPQ and IndexPQ (in "Share the PQ between OPQ and the IndexPQ")?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
!!! this code seems to be buggy. Please do not use before it is corrected.