Last active
March 17, 2024 11:56
-
-
Save agoose77/cd77e3b230b2132cbedff05c162eb222 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"id": "b306d5f8-9e84-4e15-9d6d-4f4dd5005565", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import awkward as ak\n", | |
"import numpy as np\n", | |
"import numba as nb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"id": "de2c695c-7ea7-4449-997a-bb35829814dc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def nlargest_last(array, n):\n", | |
" def apply(layout, **kwargs):\n", | |
" if layout.is_list and layout.purelist_depth == 2:\n", | |
" layout = layout.to_ListOffsetArray64()\n", | |
" indices = np.empty(layout.length * n, dtype=np.int64)\n", | |
" nlargest_kernel(\n", | |
" layout.offsets.data,\n", | |
" indices,\n", | |
" layout.content.data,\n", | |
" n\n", | |
" )\n", | |
" return ak.contents.ListOffsetArray(\n", | |
" ak.index.Index64(np.arange(0, indices.size+1, n)),\n", | |
" ak.contents.NumpyArray(indices)\n", | |
" )\n", | |
" \n", | |
" return ak.transform(apply, array)\n", | |
" \n", | |
"\n", | |
"@nb.njit\n", | |
"def nlargest_kernel(offsets, indices, values, n):\n", | |
" for i in range(len(offsets) - 1):\n", | |
" j = offsets[i]\n", | |
" k = offsets[i+1]\n", | |
" size = k - j\n", | |
" p = size - n\n", | |
" j = np.argpartition(values[j:k], p)\n", | |
" indices[i*n:(i+1)*n] = j[p:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"id": "12935a38-d72d-40aa-9a84-e78f8059103c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def nlargest_sorted_last(array, n):\n", | |
" def apply(layout, **kwargs):\n", | |
" if layout.is_list and layout.purelist_depth == 2:\n", | |
" layout = layout.to_ListOffsetArray64()\n", | |
" indices = np.empty(layout.length * n, dtype=np.int64)\n", | |
" nlargest_sorted_kernel(\n", | |
" layout.offsets.data,\n", | |
" indices,\n", | |
" layout.content.data,\n", | |
" n\n", | |
" )\n", | |
" return ak.contents.ListOffsetArray(\n", | |
" ak.index.Index64(np.arange(0, indices.size+1, n)),\n", | |
" ak.contents.NumpyArray(indices)\n", | |
" )\n", | |
" \n", | |
" return ak.transform(apply, array)\n", | |
" \n", | |
"\n", | |
"@nb.njit\n", | |
"def nlargest_sorted_kernel(offsets, indices, values, n):\n", | |
" for i in range(len(offsets) - 1):\n", | |
" j = offsets[i]\n", | |
" k = offsets[i+1]\n", | |
" size = k - j\n", | |
" p = size - n\n", | |
" j = np.argpartition(values[j:k], np.arange(p, p+n))\n", | |
" indices[i*n:(i+1)*n] = j[p:]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "0b7e9d46-aa89-4a28-8355-0fb00f231c07", | |
"metadata": {}, | |
"source": [ | |
"## For small jets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"id": "d4a27024-ee23-43f8-800c-f70f75c1fa72", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4998791" | |
] | |
}, | |
"execution_count": 37, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"n_jets = np.random.randint(3, 8, size=1_000_000)\n", | |
"pt = ak.unflatten(\n", | |
" np.random.random(n_jets.sum()),\n", | |
" n_jets\n", | |
")\n", | |
"ak.count(pt)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"id": "06848e7f-57c5-4739-8dbc-f4fbef3ceffc", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"883 ms ± 15.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"i_a = nlargest_last(pt, 2)\n", | |
"pt_sorted_a = ak.sort(\n", | |
" pt[i_a], axis=-1\n", | |
")\n", | |
"ordered_pt_a = ak.pad_none(\n", | |
" pt_sorted_a,\n", | |
" target=2,\n", | |
" clip=True\n", | |
")\n", | |
"\n", | |
"leading_jet = ordered_pt_a[:, 0]\n", | |
"subleading_jet = ordered_pt_a[:, 1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"id": "6c70d499-f394-4d4e-9b90-17eea95cd26c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.3 s ± 30.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"i_b = nlargest_sorted_last(pt, 2)\n", | |
"ordered_pt_b = ak.pad_none(\n", | |
" pt[i_b],\n", | |
" target=2,\n", | |
" clip=True\n", | |
")\n", | |
"\n", | |
"leading_jet = ordered_pt_b[:, 0]\n", | |
"subleading_jet = ordered_pt_b[:, 1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"id": "865c37f6-0454-4083-adca-4606b0e7600f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"229 ms ± 812 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"pt_sorted_c = ak.sort(\n", | |
" pt, axis=-1\n", | |
")[:, :2]\n", | |
"ordered_pt_c = ak.pad_none(\n", | |
" pt_sorted_c,\n", | |
" target=2,\n", | |
" clip=True\n", | |
")\n", | |
"\n", | |
"leading_jet = ordered_pt_c[:, 0]\n", | |
"subleading_jet = ordered_pt_c[:, 1]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d6a0843f-4be5-4bc2-914b-83717edf890f", | |
"metadata": {}, | |
"source": [ | |
"## For large jets" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"id": "063a5638-1987-43fd-8566-4188a816392d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5485005" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"n_jets = np.random.randint(300, 800, size=1_000_0)\n", | |
"pt = ak.unflatten(\n", | |
" np.random.random(n_jets.sum()),\n", | |
" n_jets\n", | |
")\n", | |
"ak.count(pt)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"id": "b06b2c93-a12e-4d35-bd80-2d727a7618c9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"84.5 ms ± 677 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"i_a = nlargest_last(pt, 2)\n", | |
"pt_sorted_a = ak.sort(\n", | |
" pt[i_a], axis=-1\n", | |
")\n", | |
"ordered_pt_a = ak.pad_none(\n", | |
" pt_sorted_a,\n", | |
" target=2,\n", | |
" clip=True\n", | |
")\n", | |
"\n", | |
"leading_jet = ordered_pt_a[:, 0]\n", | |
"subleading_jet = ordered_pt_a[:, 1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"id": "8445253d-37d5-4f85-b57f-642560278669", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"89.6 ms ± 1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"i_b = nlargest_sorted_last(pt, 2)\n", | |
"ordered_pt_b = ak.pad_none(\n", | |
" pt[i_b],\n", | |
" target=2,\n", | |
" clip=True\n", | |
")\n", | |
"\n", | |
"leading_jet = ordered_pt_b[:, 0]\n", | |
"subleading_jet = ordered_pt_b[:, 1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"id": "e1654dd5-2d1f-4905-8fd3-20686a747745", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"376 ms ± 3.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"pt_sorted_c = ak.sort(\n", | |
" pt, axis=-1\n", | |
")[:, :2]\n", | |
"ordered_pt_c = ak.pad_none(\n", | |
" pt_sorted_c,\n", | |
" target=2,\n", | |
" clip=True\n", | |
")\n", | |
"\n", | |
"leading_jet = ordered_pt_c[:, 0]\n", | |
"subleading_jet = ordered_pt_c[:, 1]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment