Created
July 17, 2025 20:57
-
-
Save vanbasten23/840a9088a0726d96b1bf0e1e9760b327 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/xiowei/miniconda3/envs/vllm/lib/python3.10/site-packages/jax/_src/cloud_tpu_init.py:84: UserWarning: Transparent hugepages are not enabled. TPU runtime startup and shutdown time should be significantly improved on TPU v5e and newer. If not already set, you may need to enable transparent hugepages in your VM image (sudo sh -c "echo always > /sys/kernel/mm/transparent_hugepage/enabled") | |
warnings.warn( | |
INFO 07-17 20:38:09 [__init__.py:244] Automatically detected platform tpu. | |
/mnt/disks/persist/vllm/vllm/platforms/tpu.py:202: UserWarning: 🚨 CAUTION: You are using 'tpu_commons' , which is experimental and NOT intended for production use yet. Please see the README for more details. | |
from tpu_commons.platforms import TpuPlatform as TpuCommonsPlatform | |
Running uLLM without Pathways. Module pathwaysutils is not imported. | |
INFO 07-17 20:38:23 [config.py:1467] Using max model len 1024 | |
INFO 07-17 20:38:23 [config.py:2267] Chunked prefill is enabled with max_num_batched_tokens=8192. | |
INFO 07-17 20:38:23 [tpu_jax.py:112] [TPU] Forcing DYNAMO_ONCE compilation level | |
WARNING 07-17 20:38:23 [tpu_jax.py:123] The model dtype is not properly set for JAX backend. Overwriting it to jnp.bfloat16 | |
WARNING:root:libtpu.so and TPU device found. Setting PJRT_DEVICE=TPU. | |
WARNING 07-17 20:38:25 [tpu_jax.py:161] JAX requires to use uniproc_executor for single host. | |
INFO 07-17 20:38:25 [core.py:459] Waiting for init message from front-end. | |
INFO 07-17 20:38:25 [tpu_jax.py:112] [TPU] Forcing DYNAMO_ONCE compilation level | |
WARNING 07-17 20:38:25 [tpu_jax.py:123] The model dtype is not properly set for JAX backend. Overwriting it to jnp.bfloat16 | |
INFO 07-17 20:38:25 [core.py:69] Initializing a V1 LLM engine (v0.9.1.dev839+g3c545c0c3.d20250715) with config: model='meta-llama/Llama-3.1-8B', speculative_config=None, tokenizer='meta-llama/Llama-3.1-8B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=<class 'jax.numpy.bfloat16'>, max_seq_len=1024, download_dir=None, load_format=auto, tensor_parallel_size=4, pipeline_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=None, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=meta-llama/Llama-3.1-8B, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=False, pooler_config=None, compilation_config={"level":2,"debug_dump_path":"","cache_dir":"","backend":"openxla","custom_ops":[],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false},"inductor_passes":{},"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"max_capture_size":512,"local_cache_dir":null} | |
xw32 JAX TPUWorker.__init__ | |
WARNING 07-17 20:38:44 [utils.py:2753] Methods add_lora,cache_config,determine_num_available_blocks,device_config,get_cache_block_size_bytes,list_loras,load_config,pin_lora,remove_lora,scheduler_config,speculative_config not implemented in <tpu_commons.worker.tpu_worker_jax.TPUWorker object at 0x7a42e7d1da80> | |
xw32 JAX TPUWorker.init_device | |
WARNING 07-17 20:38:44 [tpu_worker_jax.py:73] Init devices | devices=[TpuDevice(id=0, process_index=0, coords=(0,0,0), core_on_chip=0), TpuDevice(id=1, process_index=0, coords=(1,0,0), core_on_chip=0), TpuDevice(id=2, process_index=0, coords=(0,1,0), core_on_chip=0), TpuDevice(id=3, process_index=0, coords=(1,1,0), core_on_chip=0)] | hbm=[(0.0, 31.25), (0.0, 31.25), (0.0, 31.25), (0.0, 31.25)]Gb | |
xw32 JAX TPUModelRunner.__init__ | |
WARNING 07-17 20:38:44 [tpu_jax_runner.py:126] Init mesh | mesh=Mesh('data': 1, 'model': 4, axis_types=(Auto, Auto)) | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1475] Using exponential token paddings: | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 16 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 32 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 64 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 128 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 256 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 512 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 1024 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 2048 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 4096 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1477] 8192 | |
/mnt/disks/persist/tpu_commons/tpu_commons/runner/jax/block_table_jax.py:39: UserWarning: Explicitly requested dtype <class 'jax.numpy.int64'> requested in zeros is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/jax-ml/jax#current-gotchas for more. | |
self.slot_mapping = jnp.zeros(self.max_num_batched_tokens, | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1441] Preparing request paddings: | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1448] 8 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1448] 16 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1448] 32 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1448] 64 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1448] 128 | |
INFO 07-17 20:38:44 [tpu_torch_xla_runner.py:1448] 256 | |
INFO 07-17 20:38:44 [tpu_jax_runner.py:85] TPUModelRunner created! | |
xw32 JAX TPUWorker.load_model | |
xw32 JAX TPUModelRunner.load_model | |
INFO 07-17 20:38:44 [model_loader.py:173] Loading model, implementation type=vllm | |
/home/xiowei/miniconda3/envs/vllm/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:351: UserWarning: Device capability of jax unspecified, assuming `cpu` and `cuda`. Please specify it via the `devices` argument of `register_backend`. | |
warnings.warn( | |
INFO 07-17 20:38:44 [importing.py:43] Triton is installed but 0 active driver(s) found (expected 1). Disabling Triton to prevent runtime errors. | |
INFO 07-17 20:38:44 [importing.py:63] Triton not installed or not compatible; certain GPU-related functions will not be available. | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
[Gloo] Rank 0 is connected to 0 peer ranks. Expected number of connected peer ranks is : 0 | |
xw32 VllmModelWrapper.__init__ | |
xw32 VllmModelWrapper.load_weights | |
INFO 07-17 20:38:44 [parallel_state.py:1076] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0, EP rank 0 | |
xw32 vllm LlamaForCausalLM.__init__ | |
INFO 07-17 20:38:44 [tpu_jax.py:55] Cannot use None backend on TPU. | |
INFO 07-17 20:38:44 [tpu_jax.py:58] Using Pallas V1 backend. | |
INFO 07-17 20:38:45 [weight_utils.py:292] Using model weights format ['*.safetensors'] | |
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s] | |
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:00<00:00, 7.12it/s] | |
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:00<00:00, 4.61it/s] | |
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:00<00:00, 3.86it/s] | |
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:00<00:00, 4.28it/s] | |
INFO 07-17 20:38:46 [default_loader.py:272] Loading weights took 1.08 seconds | |
xw32 _VllmRunner.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
xw32 JaxQKVParallelLinear.__init__ | |
xw32 JaxAttention.__init__ | |
INFO 07-17 20:38:56 [tpu_jax_runner.py:193] Init model | hbm=[(6.21, 31.25), (6.21, 31.25), (6.21, 31.25), (6.21, 31.25)]Gb | |
xw32 JAX TPUWorker.get_kv_cache_spec | |
xw32 JAX TPUModelRunner.get_kv_cache_spec | |
xw32 JAX TPUWorker.determine_available_memory | |
INFO 07-17 20:38:56 [kv_cache_utils.py:716] GPU KV cache size: 738,368 tokens | |
INFO 07-17 20:38:56 [kv_cache_utils.py:720] Maximum concurrency for 1,024 tokens per request: 721.06x | |
xw32 JAX TPUWorker.initialize_from_config | |
xw32 JAX TPUModelRunner.initialize_kv_cache | |
INFO 07-17 20:38:56 [tpu_jax_runner.py:256] PJRT C API | |
INFO 07-17 20:38:56 [tpu_jax_runner.py:256] TFRT TPU v6 lite | |
INFO 07-17 20:38:56 [tpu_jax_runner.py:256] Built on Jun 11 2025 03:26:53 (1749637613) cl/770014068 | |
INFO 07-17 20:38:56 [tpu_jax_runner.py:257] Init kv-cache | shape=32 * (11537, 64, 16, 128) | sharding=NamedSharding(mesh=Mesh('data': 1, 'model': 4, axis_types=(Auto, Auto)), spec=PartitionSpec(None, None, 'model'), memory_kind=device) | hbm=[(28.74, 31.25), (28.74, 31.25), (28.74, 31.25), (28.74, 31.25)]Gb | |
xw32 JAX TPUWorker.compile_or_warm_up_model | |
xw32 JAX TPUModelRunner.capture_model | |
INFO 07-17 20:38:56 [core.py:172] init engine (profile, create kv cache, warmup model) took 0.05 seconds | |
WARNING 07-17 20:38:57 [config.py:1394] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`. | |
Adding requests: 0%| | 0/35 [00:00<?, ?it/s] | |
Adding requests: 100%|██████████| 35/35 [00:00<00:00, 2874.22it/s] | |
Processed prompts: 0%| | 0/35 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]/home/xiowei/miniconda3/envs/vllm/lib/python3.10/site-packages/jax/_src/numpy/array_methods.py:122: UserWarning: Explicitly requested dtype int64 requested in astype is not available, and will be truncated to dtype int32. To enable more dtypes, set the jax_enable_x64 configuration option or the JAX_ENABLE_X64 shell environment variable. See https://github.com/jax-ml/jax#current-gotchas for more. | |
return lax_numpy.astype(self, dtype, copy=copy, device=device) | |
Processed prompts: 3%|▎ | 1/35 [00:37<21:24, 37.77s/it, est. speed input: 0.16 toks/s, output: 0.42 toks/s]xw32 JAX TPUWorker.initialize_cache | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 VllmModelWrapper.jit_step_func.step_fun | |
xw32 set_vllm_model_wrapper_context | |
xw32 _VllmRunner.compute_hidden_state | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 VllmModelWrapper.jit_compute_logits_func.compute_logits_func | |
xw32 _VllmRunner.compute_logits | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 VllmModelWrapper.jit_step_func.step_fun | |
xw32 set_vllm_model_wrapper_context | |
xw32 _VllmRunner.compute_hidden_state | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 VllmModelWrapper.jit_compute_logits_func.compute_logits_func | |
xw32 _VllmRunner.compute_logits | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 VllmModelWrapper.jit_step_func.step_fun | |
xw32 set_vllm_model_wrapper_context | |
xw32 _VllmRunner.compute_hidden_state | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 get_vllm_model_wrapper_context | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
xw32 JAX TPUWorker.execute_model | |
xw32 JAX TPUModelRunner.execute_model | |
WARNING 07-17 20:39:35 [tpu_jax_runner.py:592] Nothing scheduled: SchedulerOutput(scheduled_new_reqs=[], scheduled_cached_reqs=[], num_scheduled_tokens={}, total_num_scheduled_tokens=0, scheduled_spec_decode_tokens={}, scheduled_encoder_inputs={}, num_common_prefix_blocks=[0], finished_req_ids={'33', '6', '3', '22', '29', '12', '27', '34', '23', '28', '31', '4', '7', '9', '5', '18', '14', '25', '2', '16', '15', '21', '32', '8', '26', '30', '17', '24', '20', '1', '11', '19', '10', '13'}, free_encoder_input_ids=[], structured_output_request_ids={}, grammar_bitmask=None, kv_connector_metadata=None)! | |
Processed prompts: 100%|██████████| 35/35 [00:37<00:00, 37.77s/it, est. speed input: 9.74 toks/s, output: 14.82 toks/s] | |
Processed prompts: 100%|██████████| 35/35 [00:37<00:00, 1.08s/it, est. speed input: 9.74 toks/s, output: 14.82 toks/s] | |
-------------------------------------------------- | |
Prompt: 'Hello, my name is' | |
Generated text: ' Kaitlyn and I am a 20 year old college student. I am' | |
-------------------------------------------------- | |
Prompt: 'The capital of France is' | |
Generated text: ' a city of many faces. It is a city of history, culture, and' | |
-------------------------------------------------- | |
Prompt: 'The colors of the rainbow are' | |
Generated text: ' red, orange, yellow, green, blue, indigo, and violet.' | |
-------------------------------------------------- | |
Prompt: 'The future of AI is' | |
Generated text: ' here, and it’s already changing the way we live and work. From self' | |
-------------------------------------------------- | |
Prompt: 'The president of the United States is' | |
Generated text: ' the head of state and head of government of the United States, indirectly elected to' | |
-------------------------------------------------- | |
Prompt: 'How many players are on a standard soccer team on the field at one time?' | |
Generated text: ' A. 11 B. 12 C. 13 D. 14' | |
-------------------------------------------------- | |
Prompt: 'In Greek mythology, who is the god of the sea?' | |
Generated text: ' Poseidon\nIn Greek mythology, who is the god of the sea?\nPose' | |
-------------------------------------------------- | |
Prompt: 'In what year did the Titanic sink?' | |
Generated text: ' What was the name of the ship that sank in the Bermuda Triangle? What was' | |
-------------------------------------------------- | |
Prompt: 'In which museum is the Mona Lisa displayed?' | |
Generated text: ' The Louvre Museum in Paris, France. The Mona Lisa is one of the' | |
-------------------------------------------------- | |
Prompt: 'Mount Everest is located in which mountain range?' | |
Generated text: ' A. the Himalayas B. the Andes C. the Alps D.' | |
-------------------------------------------------- | |
Prompt: 'What ancient empire was ruled by Julius Caesar?' | |
Generated text: ' What ancient empire was ruled by Julius Caesar?\nJulius Caesar was a Roman general' | |
-------------------------------------------------- | |
Prompt: 'What are the four fundamental forces of nature?' | |
Generated text: ' What is the difference between a force and a field? What is the difference between' | |
-------------------------------------------------- | |
Prompt: 'What does "CPU" stand for?' | |
Generated text: ' What is the difference between "CPU" and "processor"?\nThe term "' | |
-------------------------------------------------- | |
Prompt: 'What does "HTML" stand for?' | |
Generated text: ' What does "CSS" stand for? What does "JavaScript" stand for?' | |
-------------------------------------------------- | |
Prompt: 'What is the capital of Australia?' | |
Generated text: ' What is the capital of Australia?\nWhat is the capital of Australia?\nThe capital' | |
-------------------------------------------------- | |
Prompt: 'What is the chemical symbol for gold?' | |
Generated text: ' A. Au B. Ag C. Cu D. Pb\nThe chemical symbol' | |
-------------------------------------------------- | |
Prompt: 'What is the currency of Switzerland?' | |
Generated text: ' The Swiss franc (CHF) is the official currency of Switzerland. The Swiss' | |
-------------------------------------------------- | |
Prompt: 'What is the distance from the Earth to the Sun called?' | |
Generated text: ' A. the moon\nB. the solar system\nC. the orbit\n' | |
-------------------------------------------------- | |
Prompt: 'What is the freezing point of water in Celsius?' | |
Generated text: ' The freezing point of water is 0 degrees Celsius. The freezing point of water' | |
-------------------------------------------------- | |
Prompt: 'What is the hardest known natural substance on Earth?' | |
Generated text: ' Diamond, of course. But what is the hardest known man-made substance? That' | |
-------------------------------------------------- | |
Prompt: 'What is the largest planet in our solar system?' | |
Generated text: ' Jupiter. What is the smallest planet in our solar system? Mercury. What is' | |
-------------------------------------------------- | |
Prompt: 'What is the longest river in the world?' | |
Generated text: ' The Nile River is the longest river in the world. It is 4,' | |
-------------------------------------------------- | |
Prompt: 'What is the main function of the kidneys in the human body?' | |
Generated text: ' A. to produce urine B. to produce blood C. to produce sweat D' | |
-------------------------------------------------- | |
Prompt: 'What is the main ingredient in guacamole?' | |
Generated text: ' Avocados, of course! But what about the other ingredients? What are' | |
-------------------------------------------------- | |
Prompt: 'What is the most spoken language in the world by number of native speakers?' | |
Generated text: ' The answer is Mandarin Chinese, which is spoken by over 1 billion people.' | |
-------------------------------------------------- | |
Prompt: 'What is the process by which plants use sunlight to create food?' | |
Generated text: ' A. photosynthesis\nB. respiration\nC. digestion\nD.' | |
-------------------------------------------------- | |
Prompt: 'Which country is known as the Land of the Rising Sun?' | |
Generated text: ' Japan\nWhich country is known as the Land of the Rising Sun?\nA.' | |
-------------------------------------------------- | |
Prompt: 'Who developed the theory of general relativity?' | |
Generated text: ' Albert Einstein\nWhat is the theory of general relativity? The theory of general' | |
-------------------------------------------------- | |
Prompt: 'Who directed the original "Star Wars" trilogy?' | |
Generated text: ' George Lucas.\nWho directed the original "Star Wars" trilogy? George Lucas.' | |
-------------------------------------------------- | |
Prompt: 'Who is credited with inventing the telephone?' | |
Generated text: ' Alexander Graham Bell\nWhat is the name of the first telephone? The telephone was' | |
-------------------------------------------------- | |
Prompt: 'Who painted the ceiling of the Sistine Chapel?' | |
Generated text: ' Michelangelo. What is the name of the painting on the ceiling of the S' | |
-------------------------------------------------- | |
Prompt: 'Who was the first female Prime Minister of the United Kingdom?' | |
Generated text: ' Margaret Thatcher\nWho was the first female Prime Minister of the United Kingdom?\nMarg' | |
-------------------------------------------------- | |
Prompt: 'Who was the first person to walk on the moon?' | |
Generated text: ' What is the largest planet in our solar system? What is the name of the' | |
-------------------------------------------------- | |
Prompt: 'Who wrote the American Declaration of Independence?' | |
Generated text: ' Who wrote the American Declaration of Independence?\nWho wrote the American Declaration of Independence?\n' | |
-------------------------------------------------- | |
Prompt: 'Who wrote the novel "Pride and Prejudice"?' | |
Generated text: ' Jane Austen\nWhat is the name of the main character in "Pride' | |
-------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment