Skip to content

Instantly share code, notes, and snippets.

@mdouze
Created June 29, 2020 15:31
Show Gist options
  • Select an option

  • Save mdouze/1aa85afd3a753a6638106c9c06ed5f96 to your computer and use it in GitHub Desktop.

Select an option

Save mdouze/1aa85afd3a753a6638106c9c06ed5f96 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import time\n",
"\n",
"from multiprocessing.dummy import Pool as ThreadPool"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1176378172132"
]
},
"execution_count": 21,
"metadata": {
"bento_obj_id": "140487237778800"
},
"output_type": "execute_result"
}
],
"source": [
"data = np.memmap('/tmp/0.faissindex_invlists')\n",
"data.size"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def benchmark_read(data, nq, sz, nt): \n",
" pool = ThreadPool(nt)\n",
" addresses = np.random.randint(data.size - sz, size=nq)\n",
" \n",
" def get_data(ad): \n",
" return data[ad:ad + sz].copy()\n",
" \n",
" t0 = time.time()\n",
" pool.map(get_data, addresses) \n",
" \n",
" t1 = time.time() \n",
" rate = sz * nq / (t1 - t0)\n",
" print(f'read {nq} blocks of size {sz} in {nt} threads: {t1 - t0:.3f} s ({rate / (1<<20):.2f} MiB / s)')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"read 50 blocks of size 480000 in 4 threads: 2.173 s (10.53 MiB / s)\n",
"read 50 blocks of size 480000 in 10 threads: 1.705 s (13.42 MiB / s)\n",
"read 50 blocks of size 480000 in 20 threads: 1.714 s (13.35 MiB / s)\n",
"read 50 blocks of size 480000 in 40 threads: 1.638 s (13.97 MiB / s)\n"
]
}
],
"source": [
"# impact of nb of read threads\n",
"nq = 50\n",
"sz = 16 * 30000\n",
"\n",
"for nt in 4, 10, 20, 40:\n",
" benchmark_read(data, nq, sz, nt)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"read 500 blocks of size 1 in 20 threads: 19.289 s (0.00 MiB / s)\n"
]
}
],
"source": [
"# reading a single byte\n",
"nq = 500\n",
"sz = 1\n",
"nt = 20\n",
"\n",
"benchmark_read(data, nq, sz, nt)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"read 1 blocks of size 200000000 in 20 threads: 5.230 s (36.47 MiB / s)\n",
"read 10 blocks of size 20000000 in 20 threads: 3.004 s (63.49 MiB / s)\n",
"read 100 blocks of size 2000000 in 20 threads: 8.036 s (23.73 MiB / s)\n",
"read 1000 blocks of size 200000 in 20 threads: 37.519 s (5.08 MiB / s)\n"
]
}
],
"source": [
"# reading the same amount of data with different fragmentations\n",
"totsz = int(200e6)\n",
"for nq in 1, 10, 100, 1000: \n",
" sz = totsz // nq\n",
" nt = 20\n",
" benchmark_read(data, nq, sz, nt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"bento_stylesheets": {
"bento/extensions/flow/main.css": true,
"bento/extensions/kernel_selector/main.css": true,
"bento/extensions/kernel_ui/main.css": true,
"bento/extensions/new_kernel/main.css": true,
"bento/extensions/system_usage/main.css": true,
"bento/extensions/theme/main.css": true
},
"kernelspec": {
"display_name": "matthijsfaiss (local)",
"language": "python",
"name": "matthijsfaiss_local"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5+"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment