Skip to content

Instantly share code, notes, and snippets.

@TomAugspurger
Created November 7, 2025 17:59
Show Gist options
  • Select an option

  • Save TomAugspurger/68d9e0d3dbb0f51a018e10e288ed9650 to your computer and use it in GitHub Desktop.

Select an option

Save TomAugspurger/68d9e0d3dbb0f51a018e10e288ed9650 to your computer and use it in GitHub Desktop.
root@gpu-h100-0261:/app# export TERM=linux
root@gpu-h100-0261:/app# unset UCX_NET_DEVICES
root@gpu-h100-0261:/app# export UCX_PROTO_ENABLE="y"
root@gpu-h100-0261:/app# export UCX_RNDV_PIPELINE_ERROR_HANDLING="y"
root@gpu-h100-0261:/app# export UCX_MAX_RNDV_RAILS="1"
root@gpu-h100-0261:/app# export DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="300"
root@gpu-h100-0261:/app# export DASK_DISTRIBUTED__COMM__UCX__CONNECT_TIMEOUT="300"
root@gpu-h100-0261:/app#
root@gpu-h100-0261:/app# . /app/.venv/bin/activate
(cudf-polars-bench) root@gpu-h100-0261:/app#
(cudf-polars-bench) root@gpu-h100-0261:/app# for query in {1..22}; do
> nsys profile \
> -o "/data/profiles/rapidsmpf.q$query.1k" -f true \
> --trace=nvtx,cuda \
> --nvtx-domain-exclude=CCCL,rapidsmpf \
> --cuda-memory-usage=true \
> python -m cudf_polars.experimental.benchmarks.pdsh \
> --executor="streaming" \
> --runtime="rapidsmpf" \
> --path="/data/tpch-rs/scale-1000" \
> --suffix="" \
> --stream-policy="pool" \
> --n-workers 1 \
> --no-print-results \
> --no-summarize \
> --iterations=2 \
> --rmm-async \
> --blocksize 2_000_000_000 \
> -o /data/profiles/rapidsmpf-sf1k-q$query.ndjson \
> "${query}"
> done
Collecting data...
Query 1 - Iteration 0 finished in 30.1626s
[gpu-h100-0261:1564357:0:1564851] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x64)
Generating '/tmp/nsys-report-1ab5.qdstrm'
[1/1] [========================100%] rapidsmpf.q1.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q1.1k.nsys-rep
Collecting data...
Query 2 - Iteration 0 finished in 3.4335s
Query 2 - Iteration 1 finished in 0.6782s
Generating '/tmp/nsys-report-ff4e.qdstrm'
[1/1] [========================100%] rapidsmpf.q2.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q2.1k.nsys-rep
Collecting data...
Query 3 - Iteration 0 finished in 15.6152s
Query 3 - Iteration 1 finished in 6.8355s
Generating '/tmp/nsys-report-3e3b.qdstrm'
[1/1] [========================100%] rapidsmpf.q3.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q3.1k.nsys-rep
Collecting data...
[gpu-h100-0261:1566477:0:1566852] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x1)
Exception in callback Future.set_result(<rapidsmpf.st...x15546a365920>)()
handle: <Handle Future.set_result(<rapidsmpf.st...x15546a365920>)()>
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 691, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 255, in runner
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/tasks.py", line 684, in _wrap_awaitable
return await awaitable
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 174, in run
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/io.py", line 401, in scan_node
await asyncio.gather(*tasks)
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/io.py", line 382, in _producer
await read_chunk(
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/io.py", line 250, in read_chunk
df = await asyncio.to_thread(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/concurrent/futures/thread.py", line 59, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/nvtx/nvtx.py", line 123, in inner
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/dsl/ir.py", line 787, in do_evaluate
chunk = reader.read_chunk()
^^^^^^^^^^^^^^^^^^^
File "pylibcudf/io/parquet.pyx", line 366, in pylibcudf.io.parquet.ChunkedParquetReader.read_chunk
File "pylibcudf/io/parquet.pyx", line 385, in pylibcudf.io.parquet.ChunkedParquetReader.read_chunk
MemoryError: std::bad_alloc: out_of_memory: CUDA error (failed to allocate 1727836384 bytes) at: /tmp/pip-build-env-6sp37l_w/normal/lib/python3.12/site-packages/librmm/include/rmm/mr/device/cuda_async_view_memory_resource.hpp:87: cudaErrorMemoryAllocation out of memory
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
asyncio.exceptions.InvalidStateError: invalid state
Exception in callback Future.set_result(None)()
handle: <Handle Future.set_result(None)()>
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 691, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 255, in runner
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/tasks.py", line 684, in _wrap_awaitable
return await awaitable
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 174, in run
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/io.py", line 401, in scan_node
await asyncio.gather(*tasks)
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/io.py", line 382, in _producer
await read_chunk(
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/io.py", line 250, in read_chunk
df = await asyncio.to_thread(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/concurrent/futures/thread.py", line 59, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/nvtx/nvtx.py", line 123, in inner
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/dsl/ir.py", line 787, in do_evaluate
chunk = reader.read_chunk()
^^^^^^^^^^^^^^^^^^^
File "pylibcudf/io/parquet.pyx", line 366, in pylibcudf.io.parquet.ChunkedParquetReader.read_chunk
File "pylibcudf/io/parquet.pyx", line 385, in pylibcudf.io.parquet.ChunkedParquetReader.read_chunk
MemoryError: std::bad_alloc: out_of_memory: CUDA error (failed to allocate 1727836384 bytes) at: /tmp/pip-build-env-6sp37l_w/normal/lib/python3.12/site-packages/librmm/include/rmm/mr/device/cuda_async_view_memory_resource.hpp:87: cudaErrorMemoryAllocation out of memory
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
asyncio.exceptions.InvalidStateError: invalid state
Generating '/tmp/nsys-report-4594.qdstrm'
[1/1] [========================100%] rapidsmpf.q4.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q4.1k.nsys-rep
Collecting data...
Query 5 - Iteration 0 finished in 16.5519s
Query 5 - Iteration 1 finished in 7.8643s
Generating '/tmp/nsys-report-3134.qdstrm'
[1/1] [========================100%] rapidsmpf.q5.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q5.1k.nsys-rep
Collecting data...
Query 6 - Iteration 0 finished in 2.9180s
Query 6 - Iteration 1 finished in 2.6289s
Generating '/tmp/nsys-report-ea5a.qdstrm'
[1/1] [========================100%] rapidsmpf.q6.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q6.1k.nsys-rep
Collecting data...
Query 7 - Iteration 0 finished in 10.1449s
Query 7 - Iteration 1 finished in 9.2216s
Generating '/tmp/nsys-report-c2fc.qdstrm'
[1/1] [========================100%] rapidsmpf.q7.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q7.1k.nsys-rep
Collecting data...
Query 8 - Iteration 0 finished in 20.4584s
Query 8 - Iteration 1 finished in 8.6610s
Generating '/tmp/nsys-report-9742.qdstrm'
[1/1] [========================100%] rapidsmpf.q8.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q8.1k.nsys-rep
Collecting data...
Query 9 - Iteration 0 finished in 22.4880s
Query 9 - Iteration 1 finished in 21.0714s
Generating '/tmp/nsys-report-588a.qdstrm'
[1/1] [========================100%] rapidsmpf.q9.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q9.1k.nsys-rep
Collecting data...
Query 10 - Iteration 0 finished in 13.9307s
Query 10 - Iteration 1 finished in 8.9687s
Generating '/tmp/nsys-report-35b7.qdstrm'
[1/1] [========================100%] rapidsmpf.q10.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q10.1k.nsys-rep
Collecting data...
Query 11 - Iteration 0 finished in 1.6039s
Query 11 - Iteration 1 finished in 0.6729s
Generating '/tmp/nsys-report-d1a8.qdstrm'
[1/1] [========================100%] rapidsmpf.q11.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q11.1k.nsys-rep
Collecting data...
Query 12 - Iteration 0 finished in 6.4917s
Query 12 - Iteration 1 finished in 5.8905s
Generating '/tmp/nsys-report-beb9.qdstrm'
[1/1] [========================100%] rapidsmpf.q12.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q12.1k.nsys-rep
Collecting data...
Query 13 - Iteration 0 finished in 42.6662s
[gpu-h100-0261:1572922:0:1573512] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x10)
==== backtrace (tid:1573512) ====
0 /app/.venv/lib/python3.12/site-packages/libucx/lib/libucs.so(ucs_handle_error+0x294) [0x1554716d2b14]
1 /app/.venv/lib/python3.12/site-packages/libucx/lib/libucs.so(+0x34cca) [0x1554716d2cca]
2 /app/.venv/lib/python3.12/site-packages/libucx/lib/libucs.so(+0x34f7e) [0x1554716d2f7e]
3 /usr/lib64/libpthread.so.0(+0x12990) [0x155555116990]
4 /app/.venv/bin/python() [0x19403e8]
5 /app/.venv/lib/python3.12/site-packages/rapidsmpf/streaming/core/utilities.cpython-312-x86_64-linux-gnu.so(+0x37b2) [0x15546aa077b2]
6 /app/.venv/lib/python3.12/site-packages/rapidsmpf/streaming/core/channel.cpython-312-x86_64-linux-gnu.so(+0x12c61) [0x15546a243c61]
7 /app/.venv/lib/python3.12/site-packages/librapidsmpf/lib64/librapidsmpf.so(+0xd2558) [0x15546b900558]
8 /app/.venv/lib/python3.12/site-packages/librapidsmpf/lib64/librapidsmpf.so(_ZN4coro11thread_pool8executorEm+0xdc) [0x15546b920dec]
9 /usr/lib64/libstdc++.so.6(+0xc2b23) [0x155550cb5b23]
10 /usr/lib64/libpthread.so.0(+0x81ca) [0x15555510c1ca]
11 /usr/lib64/libc.so.6(clone+0x43) [0x1555543d48d3]
=================================
Generating '/tmp/nsys-report-0be7.qdstrm'
[1/1] [========================100%] rapidsmpf.q13.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q13.1k.nsys-rep
Collecting data...
Query 14 - Iteration 0 finished in 6.9746s
Query 14 - Iteration 1 finished in 6.3942s
Generating '/tmp/nsys-report-b304.qdstrm'
[1/1] [========================100%] rapidsmpf.q14.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q14.1k.nsys-rep
Collecting data...
Query 15 - Iteration 0 finished in 5.9327s
Query 15 - Iteration 1 finished in 5.6456s
Generating '/tmp/nsys-report-84e8.qdstrm'
[1/1] [========================100%] rapidsmpf.q15.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q15.1k.nsys-rep
Collecting data...
Query 16 - Iteration 0 finished in 2.1244s
Query 16 - Iteration 1 finished in 1.7289s
Generating '/tmp/nsys-report-151b.qdstrm'
[1/1] [========================100%] rapidsmpf.q16.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q16.1k.nsys-rep
Collecting data...
[gpu-h100-0261:1575816:0:1576201] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x10)
==== backtrace (tid:1576201) ====
0 /app/.venv/lib/python3.12/site-packages/libucx/lib/libucs.so(ucs_handle_error+0x294) [0x155471ad3b14]
1 /app/.venv/lib/python3.12/site-packages/libucx/lib/libucs.so(+0x34cca) [0x155471ad3cca]
2 /app/.venv/lib/python3.12/site-packages/libucx/lib/libucs.so(+0x34f7e) [0x155471ad3f7e]
3 /usr/lib64/libpthread.so.0(+0x12990) [0x155555116990]
4 /app/.venv/bin/python() [0x19403e8]
5 /app/.venv/lib/python3.12/site-packages/rapidsmpf/streaming/core/utilities.cpython-312-x86_64-linux-gnu.so(+0x37b2) [0x1554702907b2]
6 /app/.venv/lib/python3.12/site-packages/rapidsmpf/streaming/core/channel.cpython-312-x86_64-linux-gnu.so(+0x12756) [0x15546a43f756]
7 /app/.venv/lib/python3.12/site-packages/librapidsmpf/lib64/librapidsmpf.so(_ZN4coro11thread_pool8executorEm+0xdc) [0x15546b920dec]
8 /usr/lib64/libstdc++.so.6(+0xc2b23) [0x155550cb5b23]
9 /usr/lib64/libpthread.so.0(+0x81ca) [0x15555510c1ca]
10 /usr/lib64/libc.so.6(clone+0x43) [0x1555543d48d3]
=================================
Generating '/tmp/nsys-report-d018.qdstrm'
[1/1] [========================100%] rapidsmpf.q17.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q17.1k.nsys-rep
Collecting data...
Exception in callback Future.set_result(None)()
handle: <Handle Future.set_result(None)()>
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 691, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 255, in runner
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/tasks.py", line 684, in _wrap_awaitable
return await awaitable
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 174, in run
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/nodes.py", line 65, in default_node_single
df = await asyncio.to_thread(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/concurrent/futures/thread.py", line 59, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/nvtx/nvtx.py", line 123, in inner
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/dsl/ir.py", line 1768, in do_evaluate
group_keys, raw_tables = grouper.aggregate(requests, stream=df.stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pylibcudf/groupby.pyx", line 163, in pylibcudf.groupby.GroupBy.aggregate
File "pylibcudf/groupby.pyx", line 198, in pylibcudf.groupby.GroupBy.aggregate
MemoryError: std::bad_alloc: out_of_memory: CUDA error (failed to allocate 1151943612 bytes) at: /tmp/pip-build-env-6sp37l_w/normal/lib/python3.12/site-packages/librmm/include/rmm/mr/device/cuda_async_view_memory_resource.hpp:87: cudaErrorMemoryAllocation out of memory
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
asyncio.exceptions.InvalidStateError: invalid state
[gpu-h100-0261:1576527:0:1576912] Caught signal 11 (Segmentation fault: address not mapped to object at address 0x1)
Exception in callback Future.set_result(None)()
handle: <Handle Future.set_result(None)()>
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/base_events.py", line 691, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 255, in runner
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/tasks.py", line 684, in _wrap_awaitable
return await awaitable
^^^^^^^^^^^^^^^
File "rapidsmpf/streaming/core/node.pyx", line 174, in run
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/experimental/rapidsmpf/nodes.py", line 65, in default_node_single
df = await asyncio.to_thread(
^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/threads.py", line 25, in to_thread
return await loop.run_in_executor(None, func_call)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/concurrent/futures/thread.py", line 59, in run
result = self.fn(*self.args, **self.kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/nvtx/nvtx.py", line 123, in inner
result = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/app/.venv/lib/python3.12/site-packages/cudf_polars/dsl/ir.py", line 1768, in do_evaluate
group_keys, raw_tables = grouper.aggregate(requests, stream=df.stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "pylibcudf/groupby.pyx", line 163, in pylibcudf.groupby.GroupBy.aggregate
File "pylibcudf/groupby.pyx", line 198, in pylibcudf.groupby.GroupBy.aggregate
MemoryError: std::bad_alloc: out_of_memory: CUDA error (failed to allocate 1151943612 bytes) at: /tmp/pip-build-env-6sp37l_w/normal/lib/python3.12/site-packages/librmm/include/rmm/mr/device/cuda_async_view_memory_resource.hpp:87: cudaErrorMemoryAllocation out of memory
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/asyncio/events.py", line 88, in _run
self._context.run(self._callback, *self._args)
asyncio.exceptions.InvalidStateError: invalid state
Generating '/tmp/nsys-report-6b14.qdstrm'
[1/1] [========================100%] rapidsmpf.q18.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q18.1k.nsys-rep
Collecting data...
Query 19 - Iteration 0 finished in 7.9918s
Query 19 - Iteration 1 finished in 7.5635s
Generating '/tmp/nsys-report-67e1.qdstrm'
[1/1] [========================100%] rapidsmpf.q19.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q19.1k.nsys-rep
Collecting data...
Query 20 - Iteration 0 finished in 7.0392s
Query 20 - Iteration 1 finished in 7.0283s
Generating '/tmp/nsys-report-1e72.qdstrm'
[1/1] [========================100%] rapidsmpf.q20.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q20.1k.nsys-rep
Collecting data...
Query 21 - Iteration 0 finished in 83.8346s
Query 21 - Iteration 1 finished in 83.2540s
Generating '/tmp/nsys-report-0398.qdstrm'
[1/1] [========================100%] rapidsmpf.q21.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q21.1k.nsys-rep
Collecting data...
Query 22 - Iteration 0 finished in 1.2435s
Query 22 - Iteration 1 finished in 0.8413s
Generating '/tmp/nsys-report-2007.qdstrm'
[1/1] [========================100%] rapidsmpf.q22.1k.nsys-rep
Generated:
/data/profiles/rapidsmpf.q22.1k.nsys-rep
(cudf-polars-bench) root@gpu-h100-0261:/app#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment