crypdick · October 29, 2025 06:14
diff --git a/error-when-sending-gpu-object-ref-with-vllm-collective_rpc.txt b/error-when-sending-gpu-object-ref-with-vllm-collective_rpc.txt
 Updating 6f48dfe..5b23624
 Fast-forward
 rdt-vllm-simple/agents/generator/core.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)
 2025-10-28 23:11:39,280 INFO worker.py:1832 -- Connecting to existing Ray cluster at address: 10.0.10.150:6379...
 2025-10-28 23:11:39,292 INFO worker.py:2003 -- Connected to Ray cluster. View the dashboard at https://session-3frf4lk2clfpxfatd3azds6c8r.i.anyscaleuserdata.com 
 2025-10-28 23:11:39,300 INFO packaging.py:588 -- Creating a file package for local module '/home/ray/default/rl-gpu-objects/rdt-vllm-simple'.
 2025-10-28 23:11:39,305 INFO packaging.py:380 -- Pushing file package 'gcs://_ray_pkg_210586ea18ac0c9a.zip' (0.11MiB) to Ray cluster...
 2025-10-28 23:11:39,306 INFO packaging.py:393 -- Successfully pushed file package 'gcs://_ray_pkg_210586ea18ac0c9a.zip'.
 (LearnerWorker pid=21752, ip=10.0.11.247) [Learner-rank0] Initializing process group: master=10.0.11.247:34579, world_size=4
 (LearnerWorker pid=21752, ip=10.0.11.247) [Learner-rank0] Distributed init complete; dist.is_initialized=True
 [Generator] Updating weights via NIXL: <class 'ray.ObjectRef'>
 [Generator] Sleeping for 10 seconds
 (LearnerWorker pid=21752, ip=10.0.11.247) 2025-10-28 23:11:48 NIXL INFO    _api.py:361 Backend UCX was instantiated
 (LearnerWorker pid=21752, ip=10.0.11.247) 2025-10-28 23:11:48 NIXL INFO    _api.py:251 Initialized NIXL agent: 64bc38e6d5154ec7e8dfb82c10000000
 (LearnerWorker pid=21752, ip=10.0.11.247) [Learner] State dict of length 388 materialized from FSDP
 (pid=18118, ip=10.0.30.237) INFO 10-28 23:11:50 [__init__.py:216] Automatically detected platform cuda.
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [utils.py:233] non-default args: {'download_dir': '/mnt/cluster_storage/ricardo/weights/', 'dtype': 'float16', 'max_num_batched_tokens': 512, 'disable_log_stats': True, 'enforce_eager': True, 'worker_extension_cls': 'agents.weight_sync.worker_wrap.WorkerWrap', 'model': 'facebook/opt-1.3b'}
 (LearnerWorker pid=21755, ip=10.0.11.247) [Learner-rank3] Initializing process group: master=10.0.11.247:34579, world_size=4 [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)
 (LearnerWorker pid=21755, ip=10.0.11.247) [Learner-rank3] Distributed init complete; dist.is_initialized=True [repeated 3x across cluster]
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [model.py:547] Resolved architecture: OPTForCausalLM
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [model.py:1510] Using max model len 2048
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [arg_utils.py:1215] Using ray runtime env: {'_ray_commit': '1c8d40830be135dfec1e90316ee2f62ad7dec7bc', 'cgroupv2': {}, 'ray_debugger': {'working_dir': '/home/ray/default/rl-gpu-objects'}, 'working_dir': 'gcs://_ray_pkg_210586ea18ac0c9a.zip', 'pip': {'packages': ['torch', 'tqdm', 'numpy', 'matplotlib', 'bayesian-optimization', 'hyperopt', 'datasets', 'tensordict', 'accelerate', 'flashinfer-python', 'nvidia-ml-py', 'nixl', 'ucx-py-cu12'], 'pip_check': False}}
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [scheduler.py:205] Chunked prefill is enabled with max_num_batched_tokens=512.
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [__init__.py:381] Cudagraph is disabled under eager mode
 (GeneratorCore pid=18118, ip=10.0.30.237) WARNING 10-28 23:11:54 [__init__.py:3036] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: In a Ray actor and can only be spawned
 [Generator] Done sleeping
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:58 [__init__.py:216] Automatically detected platform cuda.
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:11:59 [core.py:644] Waiting for init message from front-end.
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:11:59 [core.py:77] Initializing a V1 LLM engine (v0.11.0) with config: model='facebook/opt-1.3b', speculative_config=None, tokenizer='facebook/opt-1.3b', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir='/mnt/cluster_storage/ricardo/weights/', load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=facebook/opt-1.3b, enable_prefix_caching=True, chunked_prefill_enabled=True, pooler_config=None, compilation_config={"level":0,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":[],"splitting_ops":null,"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":0,"use_cudagraph":true,"cudagraph_num_of_warmups":0,"cudagraph_capture_sizes":[],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"use_inductor_graph_partition":false,"pass_config":{},"max_capture_size":0,"local_cache_dir":null}
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:01 [worker_base.py:243] Injected <class 'agents.weight_sync.worker_wrap.WorkerWrap'> into <class 'vllm.v1.worker.gpu_worker.Worker'> for extended collective_rpc calls ['update_weights']
 (GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
 (GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
 (GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
 (GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
 (GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
 (GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:01 [parallel_state.py:1208] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:01 [topk_topp_sampler.py:55] Using FlashInfer for top-p & top-k sampling.
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [gpu_model_runner.py:2602] Starting to load model facebook/opt-1.3b...
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [gpu_model_runner.py:2634] Loading model from scratch...
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [cuda.py:366] Using Flash Attention backend on V1 engine.
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [weight_utils.py:392] Using model weights format ['*.safetensors', '*.bin', '*.pt']
 Loading pt checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]) 
 Loading pt checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.64s/it]
 Loading pt checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.64s/it]
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) 
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:04 [default_loader.py:267] Loading weights took 1.66 seconds
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:04 [gpu_model_runner.py:2653] Model loading took 2.4510 GiB and 2.189533 seconds
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:05 [gpu_worker.py:298] Available KV cache memory: 37.40 GiB
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:05 [kv_cache_utils.py:1087] GPU KV cache size: 204,240 tokens
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:05 [kv_cache_utils.py:1091] Maximum concurrency for 2,048 tokens per request: 99.73x
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) WARNING 10-28 23:12:06 [cudagraph_dispatcher.py:106] cudagraph dispatching keys are not initialized. No cudagraph will be used.
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:06 [core.py:210] init engine (profile, create kv cache, warmup model) took 1.17 seconds
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:06 [__init__.py:381] Cudagraph is disabled under eager mode
 (GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:12:06 [llm.py:306] Supported_tasks: ['generate']
 [GPUObjectDebug] queued transfer backend=nixl src=ActorID(64bc38e6d5154ec7e8dfb82c10000000) dst=ActorID(dc2e0c918ea95bc538eba88e10000000)
 (GeneratorCore pid=18118, ip=10.0.30.237) 2025-10-28 23:12:07 NIXL INFO    _api.py:361 Backend UCX was instantiated
 (GeneratorCore pid=18118, ip=10.0.30.237) 2025-10-28 23:12:07 NIXL INFO    _api.py:251 Initialized NIXL agent: dc2e0c918ea95bc538eba88e10000000
 (GeneratorCore pid=18118, ip=10.0.30.237) [Generator] State dict of length 388 on GPU
 (GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:14 [block_pool.py:378] Successfully reset prefix cache
 2025-10-28 23:12:14 NIXL INFO    _api.py:361 Backend UCX was instantiated
 2025-10-28 23:12:14 NIXL INFO    _api.py:251 Initialized NIXL agent: RAY-DRIVER-40d580a6-5c66-4c52-b1d9-17046c867e99
 Traceback (most recent call last):
  File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/grpo_vllm_fsdp_gpu_objects.py", line 151, in <module>
    train(total_steps=args.steps)
  File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/grpo_vllm_fsdp_gpu_objects.py", line 95, in train
    weights_updated_ref = generator.update_weights(learner.get_weights())
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/agents/generator/core.py", line 253, in update_weights
    ray.get(self.generator_core.update_weights.remote(state_dict_ref))
  File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/worker.py", line 2961, in get
    values, debugger_breakpoint = worker.get_objects(
                                  ^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/worker.py", line 1026, in get_objects
    raise value.as_instanceof_cause()
 ray.exceptions.RayTaskError(TypeError): ray::GeneratorCore.update_weights() (pid=18118, ip=10.0.30.237, actor_id=dc2e0c918ea95bc538eba88e10000000, repr=<agents.generator.core.GeneratorCore object at 0x753e00073d10>)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/agents/generator/core.py", line 100, in update_weights
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/entrypoints/llm.py", line 507, in collective_rpc
    return self.llm_engine.collective_rpc(method, timeout, args, kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py", line 362, in collective_rpc
    return self.engine_core.collective_rpc(method, timeout, args, kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 749, in collective_rpc
    return self.call_utility("collective_rpc", method, timeout, args,
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 694, in call_utility
    self._send_input(EngineCoreRequestType.UTILITY,
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 680, in _send_input
    *self.encoder.encode(request))
     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/serial_utils.py", line 123, in encode
    bufs[0] = self.encoder.encode(obj)
              ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/serial_utils.py", line 174, in enc_hook
    raise TypeError(f"Object of type {type(obj)} is not serializable"
 TypeError: Object of type <class 'ray.ObjectRef'> is not serializableSet VLLM_ALLOW_INSECURE_SERIALIZATION=1 to allow fallback to pickle-based serialization.
 (base) ray@ip-10-0-10-150:~/default/rl-gpu-objects$
	Updating 6f48dfe..5b23624
	Fast-forward
	rdt-vllm-simple/agents/generator/core.py \| 18 +++++++++++++-----
	1 file changed, 13 insertions(+), 5 deletions(-)
	2025-10-28 23:11:39,280 INFO worker.py:1832 -- Connecting to existing Ray cluster at address: 10.0.10.150:6379...
	2025-10-28 23:11:39,292 INFO worker.py:2003 -- Connected to Ray cluster. View the dashboard at https://session-3frf4lk2clfpxfatd3azds6c8r.i.anyscaleuserdata.com
	2025-10-28 23:11:39,300 INFO packaging.py:588 -- Creating a file package for local module '/home/ray/default/rl-gpu-objects/rdt-vllm-simple'.
	2025-10-28 23:11:39,305 INFO packaging.py:380 -- Pushing file package 'gcs://_ray_pkg_210586ea18ac0c9a.zip' (0.11MiB) to Ray cluster...
	2025-10-28 23:11:39,306 INFO packaging.py:393 -- Successfully pushed file package 'gcs://_ray_pkg_210586ea18ac0c9a.zip'.
	(LearnerWorker pid=21752, ip=10.0.11.247) [Learner-rank0] Initializing process group: master=10.0.11.247:34579, world_size=4
	(LearnerWorker pid=21752, ip=10.0.11.247) [Learner-rank0] Distributed init complete; dist.is_initialized=True
	[Generator] Updating weights via NIXL: <class 'ray.ObjectRef'>
	[Generator] Sleeping for 10 seconds
	(LearnerWorker pid=21752, ip=10.0.11.247) 2025-10-28 23:11:48 NIXL INFO _api.py:361 Backend UCX was instantiated
	(LearnerWorker pid=21752, ip=10.0.11.247) 2025-10-28 23:11:48 NIXL INFO _api.py:251 Initialized NIXL agent: 64bc38e6d5154ec7e8dfb82c10000000
	(LearnerWorker pid=21752, ip=10.0.11.247) [Learner] State dict of length 388 materialized from FSDP
	(pid=18118, ip=10.0.30.237) INFO 10-28 23:11:50 [__init__.py:216] Automatically detected platform cuda.
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [utils.py:233] non-default args: {'download_dir': '/mnt/cluster_storage/ricardo/weights/', 'dtype': 'float16', 'max_num_batched_tokens': 512, 'disable_log_stats': True, 'enforce_eager': True, 'worker_extension_cls': 'agents.weight_sync.worker_wrap.WorkerWrap', 'model': 'facebook/opt-1.3b'}
	(LearnerWorker pid=21755, ip=10.0.11.247) [Learner-rank3] Initializing process group: master=10.0.11.247:34579, world_size=4 [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)
	(LearnerWorker pid=21755, ip=10.0.11.247) [Learner-rank3] Distributed init complete; dist.is_initialized=True [repeated 3x across cluster]
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [model.py:547] Resolved architecture: OPTForCausalLM
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [model.py:1510] Using max model len 2048
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [arg_utils.py:1215] Using ray runtime env: {'_ray_commit': '1c8d40830be135dfec1e90316ee2f62ad7dec7bc', 'cgroupv2': {}, 'ray_debugger': {'working_dir': '/home/ray/default/rl-gpu-objects'}, 'working_dir': 'gcs://_ray_pkg_210586ea18ac0c9a.zip', 'pip': {'packages': ['torch', 'tqdm', 'numpy', 'matplotlib', 'bayesian-optimization', 'hyperopt', 'datasets', 'tensordict', 'accelerate', 'flashinfer-python', 'nvidia-ml-py', 'nixl', 'ucx-py-cu12'], 'pip_check': False}}
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [scheduler.py:205] Chunked prefill is enabled with max_num_batched_tokens=512.
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:53 [__init__.py:381] Cudagraph is disabled under eager mode
	(GeneratorCore pid=18118, ip=10.0.30.237) WARNING 10-28 23:11:54 [__init__.py:3036] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: In a Ray actor and can only be spawned
	[Generator] Done sleeping
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:11:58 [__init__.py:216] Automatically detected platform cuda.
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:11:59 [core.py:644] Waiting for init message from front-end.
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:11:59 [core.py:77] Initializing a V1 LLM engine (v0.11.0) with config: model='facebook/opt-1.3b', speculative_config=None, tokenizer='facebook/opt-1.3b', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=2048, download_dir='/mnt/cluster_storage/ricardo/weights/', load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=facebook/opt-1.3b, enable_prefix_caching=True, chunked_prefill_enabled=True, pooler_config=None, compilation_config={"level":0,"debug_dump_path":"","cache_dir":"","backend":"","custom_ops":[],"splitting_ops":null,"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"cudagraph_mode":0,"use_cudagraph":true,"cudagraph_num_of_warmups":0,"cudagraph_capture_sizes":[],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"use_inductor_graph_partition":false,"pass_config":{},"max_capture_size":0,"local_cache_dir":null}
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:01 [worker_base.py:243] Injected <class 'agents.weight_sync.worker_wrap.WorkerWrap'> into <class 'vllm.v1.worker.gpu_worker.Worker'> for extended collective_rpc calls ['update_weights']
	(GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
	(GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
	(GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
	(GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
	(GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
	(GeneratorCore pid=18118, ip=10.0.30.237) [Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:01 [parallel_state.py:1208] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:01 [topk_topp_sampler.py:55] Using FlashInfer for top-p & top-k sampling.
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [gpu_model_runner.py:2602] Starting to load model facebook/opt-1.3b...
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [gpu_model_runner.py:2634] Loading model from scratch...
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [cuda.py:366] Using Flash Attention backend on V1 engine.
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:02 [weight_utils.py:392] Using model weights format ['.safetensors', '.bin', '*.pt']
	Loading pt checkpoint shards: 0% Completed \| 0/1 [00:00<?, ?it/s])
	Loading pt checkpoint shards: 100% Completed \| 1/1 [00:01<00:00, 1.64s/it]
	Loading pt checkpoint shards: 100% Completed \| 1/1 [00:01<00:00, 1.64s/it]
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293)
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:04 [default_loader.py:267] Loading weights took 1.66 seconds
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:04 [gpu_model_runner.py:2653] Model loading took 2.4510 GiB and 2.189533 seconds
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:05 [gpu_worker.py:298] Available KV cache memory: 37.40 GiB
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:05 [kv_cache_utils.py:1087] GPU KV cache size: 204,240 tokens
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:05 [kv_cache_utils.py:1091] Maximum concurrency for 2,048 tokens per request: 99.73x
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) WARNING 10-28 23:12:06 [cudagraph_dispatcher.py:106] cudagraph dispatching keys are not initialized. No cudagraph will be used.
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:06 [core.py:210] init engine (profile, create kv cache, warmup model) took 1.17 seconds
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:06 [__init__.py:381] Cudagraph is disabled under eager mode
	(GeneratorCore pid=18118, ip=10.0.30.237) INFO 10-28 23:12:06 [llm.py:306] Supported_tasks: ['generate']
	[GPUObjectDebug] queued transfer backend=nixl src=ActorID(64bc38e6d5154ec7e8dfb82c10000000) dst=ActorID(dc2e0c918ea95bc538eba88e10000000)
	(GeneratorCore pid=18118, ip=10.0.30.237) 2025-10-28 23:12:07 NIXL INFO _api.py:361 Backend UCX was instantiated
	(GeneratorCore pid=18118, ip=10.0.30.237) 2025-10-28 23:12:07 NIXL INFO _api.py:251 Initialized NIXL agent: dc2e0c918ea95bc538eba88e10000000
	(GeneratorCore pid=18118, ip=10.0.30.237) [Generator] State dict of length 388 on GPU
	(GeneratorCore pid=18118, ip=10.0.30.237) (EngineCore_DP0 pid=18293) INFO 10-28 23:12:14 [block_pool.py:378] Successfully reset prefix cache
	2025-10-28 23:12:14 NIXL INFO _api.py:361 Backend UCX was instantiated
	2025-10-28 23:12:14 NIXL INFO _api.py:251 Initialized NIXL agent: RAY-DRIVER-40d580a6-5c66-4c52-b1d9-17046c867e99
	Traceback (most recent call last):
	File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/grpo_vllm_fsdp_gpu_objects.py", line 151, in <module>
	train(total_steps=args.steps)
	File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/grpo_vllm_fsdp_gpu_objects.py", line 95, in train
	weights_updated_ref = generator.update_weights(learner.get_weights())
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/agents/generator/core.py", line 253, in update_weights
	ray.get(self.generator_core.update_weights.remote(state_dict_ref))
	File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
	return fn(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
	return func(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/worker.py", line 2961, in get
	values, debugger_breakpoint = worker.get_objects(
	^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/ray/_private/worker.py", line 1026, in get_objects
	raise value.as_instanceof_cause()
	ray.exceptions.RayTaskError(TypeError): ray::GeneratorCore.update_weights() (pid=18118, ip=10.0.30.237, actor_id=dc2e0c918ea95bc538eba88e10000000, repr=<agents.generator.core.GeneratorCore object at 0x753e00073d10>)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/default/rl-gpu-objects/rdt-vllm-simple/agents/generator/core.py", line 100, in update_weights
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/entrypoints/llm.py", line 507, in collective_rpc
	return self.llm_engine.collective_rpc(method, timeout, args, kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/llm_engine.py", line 362, in collective_rpc
	return self.engine_core.collective_rpc(method, timeout, args, kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 749, in collective_rpc
	return self.call_utility("collective_rpc", method, timeout, args,
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 694, in call_utility
	self._send_input(EngineCoreRequestType.UTILITY,
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/engine/core_client.py", line 680, in _send_input
	*self.encoder.encode(request))
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/serial_utils.py", line 123, in encode
	bufs[0] = self.encoder.encode(obj)
	^^^^^^^^^^^^^^^^^^^^^^^^
	File "/home/ray/anaconda3/lib/python3.11/site-packages/vllm/v1/serial_utils.py", line 174, in enc_hook
	raise TypeError(f"Object of type {type(obj)} is not serializable"
	TypeError: Object of type <class 'ray.ObjectRef'> is not serializableSet VLLM_ALLOW_INSECURE_SERIALIZATION=1 to allow fallback to pickle-based serialization.
	(base) ray@ip-10-0-10-150:~/default/rl-gpu-objects$