Last active
June 14, 2025 06:35
-
-
Save hongbo-miao/fe51beaa5faa2477ddb72c42e1914d96 to your computer and use it in GitHub Desktop.
olmocr log when run in NVIDIA GeForce RTX 5090 GPU
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
root@2fdffe8b8e20:~# python -m olmocr.pipeline ./localworkspace --markdown --pdfs olmocr-sample.pdf | |
INFO:olmocr.check:pdftoppm is installed and working. | |
2025-06-14 06:27:39,378 - __main__ - INFO - Got --pdfs argument, going to add to the work queue | |
2025-06-14 06:27:39,378 - __main__ - INFO - Loading file at olmocr-sample.pdf as PDF document | |
2025-06-14 06:27:39,378 - __main__ - INFO - Found 1 total pdf paths to add | |
Sampling PDFs to calculate optimal length: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 530.66it/s] | |
2025-06-14 06:27:39,381 - __main__ - INFO - Calculated items_per_group: 166 based on average pages per PDF: 3.00 | |
INFO:olmocr.work_queue:Found 1 total paths | |
INFO:olmocr.work_queue:0 new paths to add to the workspace | |
/usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
warnings.warn( | |
2025-06-14 06:27:39,506 - __main__ - INFO - Starting pipeline with PID 3844 | |
2025-06-14 06:27:39,506 - __main__ - INFO - Downloading model with hugging face 'allenai/olmOCR-7B-0225-preview' | |
Fetching 15 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 62045.92it/s] | |
INFO:olmocr.work_queue:Initialized local queue with 1 work items | |
2025-06-14 06:27:39,642 - __main__ - WARNING - Attempt 1: Please wait for sglang server to become ready... | |
2025-06-14 06:27:40,649 - __main__ - WARNING - Attempt 2: Please wait for sglang server to become ready... | |
2025-06-14 06:27:41,653 - __main__ - WARNING - Attempt 3: Please wait for sglang server to become ready... | |
2025-06-14 06:27:42,658 - __main__ - WARNING - Attempt 4: Please wait for sglang server to become ready... | |
2025-06-14 06:27:42,911 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:27:42,911 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:27:43,050 - __main__ - INFO - [2025-06-14 06:27:43] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=476744651, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None) | |
2025-06-14 06:27:43,328 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:43,328 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:43,329 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:43,329 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:43,329 - __main__ - INFO - | |
2025-06-14 06:27:43,329 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:43,664 - __main__ - WARNING - Attempt 5: Please wait for sglang server to become ready... | |
2025-06-14 06:27:44,197 - __main__ - INFO - [2025-06-14 06:27:44] Use chat template for the OpenAI-compatible API server: qwen2-vl | |
2025-06-14 06:27:44,669 - __main__ - WARNING - Attempt 6: Please wait for sglang server to become ready... | |
2025-06-14 06:27:45,674 - __main__ - WARNING - Attempt 7: Please wait for sglang server to become ready... | |
2025-06-14 06:27:46,550 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:27:46,550 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:27:46,587 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:27:46,587 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:27:46,681 - __main__ - WARNING - Attempt 8: Please wait for sglang server to become ready... | |
2025-06-14 06:27:46,914 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:46,914 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:46,914 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:46,914 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:46,914 - __main__ - INFO - | |
2025-06-14 06:27:46,914 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:46,990 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:46,990 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:46,990 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:46,990 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:46,990 - __main__ - INFO - | |
2025-06-14 06:27:46,990 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:47,686 - __main__ - WARNING - Attempt 9: Please wait for sglang server to become ready... | |
2025-06-14 06:27:47,771 - __main__ - INFO - [2025-06-14 06:27:47 TP0] Overlap scheduler is disabled for multimodal models. | |
2025-06-14 06:27:47,919 - __main__ - INFO - [2025-06-14 06:27:47 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model. | |
2025-06-14 06:27:47,919 - __main__ - INFO - [2025-06-14 06:27:47 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl. | |
2025-06-14 06:27:47,919 - __main__ - INFO - [2025-06-14 06:27:47 TP0] Init torch distributed begin. | |
2025-06-14 06:27:47,960 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:47,960 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:47,960 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:47,960 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:47,960 - __main__ - INFO - | |
2025-06-14 06:27:47,960 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:48,060 - __main__ - INFO - [2025-06-14 06:27:48 TP0] Load weight begin. avail mem=27.12 GB | |
2025-06-14 06:27:48,236 - __main__ - INFO - [2025-06-14 06:27:48 TP0] Ignore import error when loading sglang.srt.models.llava. Failed to import transformers.models.clip.modeling_clip because of the following error (look up to see its traceback): | |
2025-06-14 06:27:48,236 - __main__ - INFO - CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:27:48,236 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:27:48,236 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:27:48,236 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:27:48,236 - __main__ - INFO - | |
2025-06-14 06:27:48,258 - __main__ - INFO - [2025-06-14 06:27:48 TP0] Scheduler hit an exception: Traceback (most recent call last): | |
2025-06-14 06:27:48,258 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process | |
2025-06-14 06:27:48,259 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank) | |
2025-06-14 06:27:48,259 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,259 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 239, in __init__ | |
2025-06-14 06:27:48,259 - __main__ - INFO - self.tp_worker = TpWorkerClass( | |
2025-06-14 06:27:48,259 - __main__ - INFO - ^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,259 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__ | |
2025-06-14 06:27:48,259 - __main__ - INFO - self.model_runner = ModelRunner( | |
2025-06-14 06:27:48,259 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:27:48,259 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__ | |
2025-06-14 06:27:48,259 - __main__ - INFO - self.load_model() | |
2025-06-14 06:27:48,259 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model | |
2025-06-14 06:27:48,259 - __main__ - INFO - self.model = get_model( | |
2025-06-14 06:27:48,259 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:27:48,259 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model | |
2025-06-14 06:27:48,259 - __main__ - INFO - return loader.load_model( | |
2025-06-14 06:27:48,259 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,259 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 357, in load_model | |
2025-06-14 06:27:48,259 - __main__ - INFO - model = _initialize_model( | |
2025-06-14 06:27:48,259 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 138, in _initialize_model | |
2025-06-14 06:27:48,260 - __main__ - INFO - return model_class( | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 435, in __init__ | |
2025-06-14 06:27:48,260 - __main__ - INFO - self.visual = Qwen2VisionTransformer( | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 279, in __init__ | |
2025-06-14 06:27:48,260 - __main__ - INFO - self.patch_embed = Qwen2VisionPatchEmbed( | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 170, in __init__ | |
2025-06-14 06:27:48,260 - __main__ - INFO - self.proj = nn.Conv3d( | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 692, in __init__ | |
2025-06-14 06:27:48,260 - __main__ - INFO - super().__init__( | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 176, in __init__ | |
2025-06-14 06:27:48,260 - __main__ - INFO - self.reset_parameters() | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 182, in reset_parameters | |
2025-06-14 06:27:48,260 - __main__ - INFO - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 500, in kaiming_uniform_ | |
2025-06-14 06:27:48,260 - __main__ - INFO - return torch.overrides.handle_torch_function( | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/overrides.py", line 1717, in handle_torch_function | |
2025-06-14 06:27:48,260 - __main__ - INFO - result = mode.__torch_function__(public_api, types, args, kwargs) | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ | |
2025-06-14 06:27:48,260 - __main__ - INFO - return func(*args, **kwargs) | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 518, in kaiming_uniform_ | |
2025-06-14 06:27:48,260 - __main__ - INFO - return tensor.uniform_(-bound, bound, generator=generator) | |
2025-06-14 06:27:48,260 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:48,260 - __main__ - INFO - RuntimeError: CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:27:48,260 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:27:48,260 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:27:48,260 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:27:48,260 - __main__ - INFO - | |
2025-06-14 06:27:48,260 - __main__ - INFO - | |
2025-06-14 06:27:48,260 - __main__ - INFO - [2025-06-14 06:27:48] Received sigquit from a child proces. It usually means the child failed. | |
2025-06-14 06:27:48,470 - __main__ - WARNING - SGLang server task ended | |
2025-06-14 06:27:48,692 - __main__ - WARNING - Attempt 10: Please wait for sglang server to become ready... | |
2025-06-14 06:27:49,697 - __main__ - WARNING - Attempt 11: Please wait for sglang server to become ready... | |
2025-06-14 06:27:50,703 - __main__ - WARNING - Attempt 12: Please wait for sglang server to become ready... | |
2025-06-14 06:27:51,708 - __main__ - WARNING - Attempt 13: Please wait for sglang server to become ready... | |
2025-06-14 06:27:51,722 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:27:51,722 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:27:51,840 - __main__ - INFO - [2025-06-14 06:27:51] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=582389988, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None) | |
2025-06-14 06:27:52,051 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:52,051 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:52,051 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:52,051 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:52,051 - __main__ - INFO - | |
2025-06-14 06:27:52,051 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:52,714 - __main__ - WARNING - Attempt 14: Please wait for sglang server to become ready... | |
2025-06-14 06:27:53,007 - __main__ - INFO - [2025-06-14 06:27:53] Use chat template for the OpenAI-compatible API server: qwen2-vl | |
2025-06-14 06:27:53,721 - __main__ - WARNING - Attempt 15: Please wait for sglang server to become ready... | |
2025-06-14 06:27:54,732 - __main__ - WARNING - Attempt 16: Please wait for sglang server to become ready... | |
2025-06-14 06:27:55,432 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:27:55,432 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:27:55,517 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:27:55,517 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:27:55,737 - __main__ - WARNING - Attempt 17: Please wait for sglang server to become ready... | |
2025-06-14 06:27:55,760 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:55,761 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:55,761 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:55,761 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:55,761 - __main__ - INFO - | |
2025-06-14 06:27:55,761 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:55,845 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:55,845 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:55,845 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:55,845 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:55,845 - __main__ - INFO - | |
2025-06-14 06:27:55,845 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:56,743 - __main__ - WARNING - Attempt 18: Please wait for sglang server to become ready... | |
2025-06-14 06:27:57,122 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Overlap scheduler is disabled for multimodal models. | |
2025-06-14 06:27:57,266 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model. | |
2025-06-14 06:27:57,266 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl. | |
2025-06-14 06:27:57,266 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Init torch distributed begin. | |
2025-06-14 06:27:57,307 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:27:57,307 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:27:57,307 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:27:57,307 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:27:57,307 - __main__ - INFO - | |
2025-06-14 06:27:57,307 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:27:57,417 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Load weight begin. avail mem=27.12 GB | |
2025-06-14 06:27:57,585 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Ignore import error when loading sglang.srt.models.llava. Failed to import transformers.models.clip.modeling_clip because of the following error (look up to see its traceback): | |
2025-06-14 06:27:57,586 - __main__ - INFO - CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:27:57,586 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:27:57,586 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:27:57,586 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:27:57,586 - __main__ - INFO - | |
2025-06-14 06:27:57,607 - __main__ - INFO - [2025-06-14 06:27:57 TP0] Scheduler hit an exception: Traceback (most recent call last): | |
2025-06-14 06:27:57,607 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process | |
2025-06-14 06:27:57,607 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank) | |
2025-06-14 06:27:57,607 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,607 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 239, in __init__ | |
2025-06-14 06:27:57,607 - __main__ - INFO - self.tp_worker = TpWorkerClass( | |
2025-06-14 06:27:57,607 - __main__ - INFO - ^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,607 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__ | |
2025-06-14 06:27:57,607 - __main__ - INFO - self.model_runner = ModelRunner( | |
2025-06-14 06:27:57,607 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:27:57,607 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__ | |
2025-06-14 06:27:57,607 - __main__ - INFO - self.load_model() | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model | |
2025-06-14 06:27:57,608 - __main__ - INFO - self.model = get_model( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model | |
2025-06-14 06:27:57,608 - __main__ - INFO - return loader.load_model( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 357, in load_model | |
2025-06-14 06:27:57,608 - __main__ - INFO - model = _initialize_model( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 138, in _initialize_model | |
2025-06-14 06:27:57,608 - __main__ - INFO - return model_class( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 435, in __init__ | |
2025-06-14 06:27:57,608 - __main__ - INFO - self.visual = Qwen2VisionTransformer( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 279, in __init__ | |
2025-06-14 06:27:57,608 - __main__ - INFO - self.patch_embed = Qwen2VisionPatchEmbed( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 170, in __init__ | |
2025-06-14 06:27:57,608 - __main__ - INFO - self.proj = nn.Conv3d( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 692, in __init__ | |
2025-06-14 06:27:57,608 - __main__ - INFO - super().__init__( | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 176, in __init__ | |
2025-06-14 06:27:57,608 - __main__ - INFO - self.reset_parameters() | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 182, in reset_parameters | |
2025-06-14 06:27:57,608 - __main__ - INFO - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 500, in kaiming_uniform_ | |
2025-06-14 06:27:57,608 - __main__ - INFO - return torch.overrides.handle_torch_function( | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/overrides.py", line 1717, in handle_torch_function | |
2025-06-14 06:27:57,608 - __main__ - INFO - result = mode.__torch_function__(public_api, types, args, kwargs) | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ | |
2025-06-14 06:27:57,608 - __main__ - INFO - return func(*args, **kwargs) | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 518, in kaiming_uniform_ | |
2025-06-14 06:27:57,608 - __main__ - INFO - return tensor.uniform_(-bound, bound, generator=generator) | |
2025-06-14 06:27:57,608 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:27:57,608 - __main__ - INFO - RuntimeError: CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:27:57,608 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:27:57,609 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:27:57,609 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:27:57,609 - __main__ - INFO - | |
2025-06-14 06:27:57,609 - __main__ - INFO - | |
2025-06-14 06:27:57,609 - __main__ - INFO - [2025-06-14 06:27:57] Received sigquit from a child proces. It usually means the child failed. | |
2025-06-14 06:27:57,756 - __main__ - WARNING - Attempt 19: Please wait for sglang server to become ready... | |
2025-06-14 06:27:57,854 - __main__ - WARNING - SGLang server task ended | |
2025-06-14 06:27:58,763 - __main__ - WARNING - Attempt 20: Please wait for sglang server to become ready... | |
2025-06-14 06:27:59,770 - __main__ - WARNING - Attempt 21: Please wait for sglang server to become ready... | |
2025-06-14 06:28:00,778 - __main__ - WARNING - Attempt 22: Please wait for sglang server to become ready... | |
2025-06-14 06:28:01,021 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:01,021 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:01,143 - __main__ - INFO - [2025-06-14 06:28:01] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=150910178, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None) | |
2025-06-14 06:28:01,429 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:01,429 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:01,430 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:01,430 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:01,430 - __main__ - INFO - | |
2025-06-14 06:28:01,430 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:01,785 - __main__ - WARNING - Attempt 23: Please wait for sglang server to become ready... | |
2025-06-14 06:28:02,388 - __main__ - INFO - [2025-06-14 06:28:02] Use chat template for the OpenAI-compatible API server: qwen2-vl | |
2025-06-14 06:28:02,790 - __main__ - WARNING - Attempt 24: Please wait for sglang server to become ready... | |
2025-06-14 06:28:03,796 - __main__ - WARNING - Attempt 25: Please wait for sglang server to become ready... | |
2025-06-14 06:28:04,654 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:04,654 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:04,681 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:04,681 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:04,804 - __main__ - WARNING - Attempt 26: Please wait for sglang server to become ready... | |
2025-06-14 06:28:05,028 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:05,028 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:05,028 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:05,028 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:05,028 - __main__ - INFO - | |
2025-06-14 06:28:05,028 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:05,047 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:05,047 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:05,047 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:05,047 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:05,047 - __main__ - INFO - | |
2025-06-14 06:28:05,047 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:05,810 - __main__ - WARNING - Attempt 27: Please wait for sglang server to become ready... | |
2025-06-14 06:28:05,849 - __main__ - INFO - [2025-06-14 06:28:05 TP0] Overlap scheduler is disabled for multimodal models. | |
2025-06-14 06:28:05,991 - __main__ - INFO - [2025-06-14 06:28:05 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model. | |
2025-06-14 06:28:05,991 - __main__ - INFO - [2025-06-14 06:28:05 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl. | |
2025-06-14 06:28:05,991 - __main__ - INFO - [2025-06-14 06:28:05 TP0] Init torch distributed begin. | |
2025-06-14 06:28:06,023 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:06,023 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:06,024 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:06,024 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:06,024 - __main__ - INFO - | |
2025-06-14 06:28:06,024 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:06,131 - __main__ - INFO - [2025-06-14 06:28:06 TP0] Load weight begin. avail mem=27.12 GB | |
2025-06-14 06:28:06,315 - __main__ - INFO - [2025-06-14 06:28:06 TP0] Ignore import error when loading sglang.srt.models.llava. Failed to import transformers.models.clip.modeling_clip because of the following error (look up to see its traceback): | |
2025-06-14 06:28:06,315 - __main__ - INFO - CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:28:06,315 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:28:06,315 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:28:06,315 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:28:06,315 - __main__ - INFO - | |
2025-06-14 06:28:06,337 - __main__ - INFO - [2025-06-14 06:28:06 TP0] Scheduler hit an exception: Traceback (most recent call last): | |
2025-06-14 06:28:06,337 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process | |
2025-06-14 06:28:06,337 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank) | |
2025-06-14 06:28:06,337 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,337 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 239, in __init__ | |
2025-06-14 06:28:06,337 - __main__ - INFO - self.tp_worker = TpWorkerClass( | |
2025-06-14 06:28:06,337 - __main__ - INFO - ^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,337 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__ | |
2025-06-14 06:28:06,337 - __main__ - INFO - self.model_runner = ModelRunner( | |
2025-06-14 06:28:06,337 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:28:06,337 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__ | |
2025-06-14 06:28:06,337 - __main__ - INFO - self.load_model() | |
2025-06-14 06:28:06,337 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model | |
2025-06-14 06:28:06,337 - __main__ - INFO - self.model = get_model( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model | |
2025-06-14 06:28:06,338 - __main__ - INFO - return loader.load_model( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 357, in load_model | |
2025-06-14 06:28:06,338 - __main__ - INFO - model = _initialize_model( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 138, in _initialize_model | |
2025-06-14 06:28:06,338 - __main__ - INFO - return model_class( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 435, in __init__ | |
2025-06-14 06:28:06,338 - __main__ - INFO - self.visual = Qwen2VisionTransformer( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 279, in __init__ | |
2025-06-14 06:28:06,338 - __main__ - INFO - self.patch_embed = Qwen2VisionPatchEmbed( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 170, in __init__ | |
2025-06-14 06:28:06,338 - __main__ - INFO - self.proj = nn.Conv3d( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 692, in __init__ | |
2025-06-14 06:28:06,338 - __main__ - INFO - super().__init__( | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 176, in __init__ | |
2025-06-14 06:28:06,338 - __main__ - INFO - self.reset_parameters() | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 182, in reset_parameters | |
2025-06-14 06:28:06,338 - __main__ - INFO - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 500, in kaiming_uniform_ | |
2025-06-14 06:28:06,338 - __main__ - INFO - return torch.overrides.handle_torch_function( | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/overrides.py", line 1717, in handle_torch_function | |
2025-06-14 06:28:06,338 - __main__ - INFO - result = mode.__torch_function__(public_api, types, args, kwargs) | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ | |
2025-06-14 06:28:06,338 - __main__ - INFO - return func(*args, **kwargs) | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 518, in kaiming_uniform_ | |
2025-06-14 06:28:06,338 - __main__ - INFO - return tensor.uniform_(-bound, bound, generator=generator) | |
2025-06-14 06:28:06,338 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:06,338 - __main__ - INFO - RuntimeError: CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:28:06,338 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:28:06,338 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:28:06,338 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:28:06,338 - __main__ - INFO - | |
2025-06-14 06:28:06,338 - __main__ - INFO - | |
2025-06-14 06:28:06,339 - __main__ - INFO - [2025-06-14 06:28:06] Received sigquit from a child proces. It usually means the child failed. | |
2025-06-14 06:28:06,586 - __main__ - WARNING - SGLang server task ended | |
2025-06-14 06:28:06,816 - __main__ - WARNING - Attempt 28: Please wait for sglang server to become ready... | |
2025-06-14 06:28:07,822 - __main__ - WARNING - Attempt 29: Please wait for sglang server to become ready... | |
2025-06-14 06:28:08,828 - __main__ - WARNING - Attempt 30: Please wait for sglang server to become ready... | |
2025-06-14 06:28:09,812 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:09,813 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:09,833 - __main__ - WARNING - Attempt 31: Please wait for sglang server to become ready... | |
2025-06-14 06:28:09,940 - __main__ - INFO - [2025-06-14 06:28:09] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=1004994523, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None) | |
2025-06-14 06:28:10,182 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:10,182 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:10,182 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:10,182 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:10,182 - __main__ - INFO - | |
2025-06-14 06:28:10,182 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:10,839 - __main__ - WARNING - Attempt 32: Please wait for sglang server to become ready... | |
2025-06-14 06:28:11,166 - __main__ - INFO - [2025-06-14 06:28:11] Use chat template for the OpenAI-compatible API server: qwen2-vl | |
2025-06-14 06:28:11,845 - __main__ - WARNING - Attempt 33: Please wait for sglang server to become ready... | |
2025-06-14 06:28:12,852 - __main__ - WARNING - Attempt 34: Please wait for sglang server to become ready... | |
2025-06-14 06:28:13,439 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:13,439 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:13,445 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:13,445 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:13,777 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:13,777 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:13,777 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:13,777 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:13,777 - __main__ - INFO - | |
2025-06-14 06:28:13,777 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:13,787 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:13,787 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:13,787 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:13,787 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:13,787 - __main__ - INFO - | |
2025-06-14 06:28:13,787 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:13,857 - __main__ - WARNING - Attempt 35: Please wait for sglang server to become ready... | |
2025-06-14 06:28:14,760 - __main__ - INFO - [2025-06-14 06:28:14 TP0] Overlap scheduler is disabled for multimodal models. | |
2025-06-14 06:28:14,864 - __main__ - WARNING - Attempt 36: Please wait for sglang server to become ready... | |
2025-06-14 06:28:14,900 - __main__ - INFO - [2025-06-14 06:28:14 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model. | |
2025-06-14 06:28:14,900 - __main__ - INFO - [2025-06-14 06:28:14 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl. | |
2025-06-14 06:28:14,901 - __main__ - INFO - [2025-06-14 06:28:14 TP0] Init torch distributed begin. | |
2025-06-14 06:28:14,941 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:14,941 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:14,941 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:14,941 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:14,941 - __main__ - INFO - | |
2025-06-14 06:28:14,941 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:15,045 - __main__ - INFO - [2025-06-14 06:28:15 TP0] Load weight begin. avail mem=27.12 GB | |
2025-06-14 06:28:15,219 - __main__ - INFO - [2025-06-14 06:28:15 TP0] Ignore import error when loading sglang.srt.models.llava. Failed to import transformers.models.clip.modeling_clip because of the following error (look up to see its traceback): | |
2025-06-14 06:28:15,219 - __main__ - INFO - CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:28:15,220 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:28:15,220 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:28:15,220 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:28:15,220 - __main__ - INFO - | |
2025-06-14 06:28:15,242 - __main__ - INFO - [2025-06-14 06:28:15 TP0] Scheduler hit an exception: Traceback (most recent call last): | |
2025-06-14 06:28:15,242 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process | |
2025-06-14 06:28:15,242 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank) | |
2025-06-14 06:28:15,242 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,242 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 239, in __init__ | |
2025-06-14 06:28:15,243 - __main__ - INFO - self.tp_worker = TpWorkerClass( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__ | |
2025-06-14 06:28:15,243 - __main__ - INFO - self.model_runner = ModelRunner( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__ | |
2025-06-14 06:28:15,243 - __main__ - INFO - self.load_model() | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model | |
2025-06-14 06:28:15,243 - __main__ - INFO - self.model = get_model( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model | |
2025-06-14 06:28:15,243 - __main__ - INFO - return loader.load_model( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 357, in load_model | |
2025-06-14 06:28:15,243 - __main__ - INFO - model = _initialize_model( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 138, in _initialize_model | |
2025-06-14 06:28:15,243 - __main__ - INFO - return model_class( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 435, in __init__ | |
2025-06-14 06:28:15,243 - __main__ - INFO - self.visual = Qwen2VisionTransformer( | |
2025-06-14 06:28:15,243 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,243 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 279, in __init__ | |
2025-06-14 06:28:15,244 - __main__ - INFO - self.patch_embed = Qwen2VisionPatchEmbed( | |
2025-06-14 06:28:15,244 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 170, in __init__ | |
2025-06-14 06:28:15,244 - __main__ - INFO - self.proj = nn.Conv3d( | |
2025-06-14 06:28:15,244 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 692, in __init__ | |
2025-06-14 06:28:15,244 - __main__ - INFO - super().__init__( | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 176, in __init__ | |
2025-06-14 06:28:15,244 - __main__ - INFO - self.reset_parameters() | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 182, in reset_parameters | |
2025-06-14 06:28:15,244 - __main__ - INFO - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 500, in kaiming_uniform_ | |
2025-06-14 06:28:15,244 - __main__ - INFO - return torch.overrides.handle_torch_function( | |
2025-06-14 06:28:15,244 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/overrides.py", line 1717, in handle_torch_function | |
2025-06-14 06:28:15,244 - __main__ - INFO - result = mode.__torch_function__(public_api, types, args, kwargs) | |
2025-06-14 06:28:15,244 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ | |
2025-06-14 06:28:15,244 - __main__ - INFO - return func(*args, **kwargs) | |
2025-06-14 06:28:15,244 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,244 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 518, in kaiming_uniform_ | |
2025-06-14 06:28:15,244 - __main__ - INFO - return tensor.uniform_(-bound, bound, generator=generator) | |
2025-06-14 06:28:15,244 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:15,244 - __main__ - INFO - RuntimeError: CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:28:15,244 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:28:15,244 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:28:15,244 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:28:15,244 - __main__ - INFO - | |
2025-06-14 06:28:15,244 - __main__ - INFO - | |
2025-06-14 06:28:15,244 - __main__ - INFO - [2025-06-14 06:28:15] Received sigquit from a child proces. It usually means the child failed. | |
2025-06-14 06:28:15,493 - __main__ - WARNING - SGLang server task ended | |
2025-06-14 06:28:15,871 - __main__ - WARNING - Attempt 37: Please wait for sglang server to become ready... | |
2025-06-14 06:28:16,877 - __main__ - WARNING - Attempt 38: Please wait for sglang server to become ready... | |
2025-06-14 06:28:17,883 - __main__ - WARNING - Attempt 39: Please wait for sglang server to become ready... | |
2025-06-14 06:28:18,714 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:18,714 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:18,836 - __main__ - INFO - [2025-06-14 06:28:18] server_args=ServerArgs(model_path='allenai/olmOCR-7B-0225-preview', tokenizer_path='allenai/olmOCR-7B-0225-preview', tokenizer_mode='auto', load_format='auto', trust_remote_code=False, dtype='auto', kv_cache_dtype='auto', quantization_param_path=None, quantization=None, context_length=None, device='cuda', served_model_name='allenai/olmOCR-7B-0225-preview', chat_template='qwen2-vl', is_embedding=False, revision=None, skip_tokenizer_init=False, host='127.0.0.1', port=30024, mem_fraction_static=0.8, max_running_requests=None, max_total_tokens=None, chunked_prefill_size=8192, max_prefill_tokens=16384, schedule_policy='lpm', schedule_conservativeness=1.0, cpu_offload_gb=0, prefill_only_one_req=False, tp_size=1, stream_interval=1, stream_output=False, random_seed=780344672, constrained_json_whitespace_pattern=None, watchdog_timeout=300, download_dir=None, base_gpu_id=0, log_level='info', log_level_http='warning', log_requests=False, show_time_cost=False, enable_metrics=False, decode_log_interval=40, api_key=None, file_storage_pth='sglang_storage', enable_cache_report=False, dp_size=1, load_balance_method='round_robin', ep_size=1, dist_init_addr=None, nnodes=1, node_rank=0, json_model_override_args='{}', lora_paths=None, max_loras_per_batch=8, attention_backend='flashinfer', sampling_backend='flashinfer', grammar_backend='outlines', speculative_draft_model_path=None, speculative_algorithm=None, speculative_num_steps=5, speculative_num_draft_tokens=64, speculative_eagle_topk=8, enable_double_sparsity=False, ds_channel_config_path=None, ds_heavy_channel_num=32, ds_heavy_token_num=256, ds_heavy_channel_type='qk', ds_sparse_decode_threshold=4096, disable_radix_cache=False, disable_jump_forward=False, disable_cuda_graph=False, disable_cuda_graph_padding=False, disable_outlines_disk_cache=False, disable_custom_all_reduce=False, disable_mla=False, disable_overlap_schedule=False, enable_mixed_chunk=False, enable_dp_attention=False, enable_ep_moe=False, enable_torch_compile=False, torch_compile_max_bs=32, cuda_graph_max_bs=160, cuda_graph_bs=None, torchao_config='', enable_nan_detection=False, enable_p2p_check=False, triton_attention_reduce_in_fp32=False, triton_attention_num_kv_splits=8, num_continuous_decode_steps=1, delete_ckpt_after_loading=False, enable_memory_saver=False, allow_auto_truncate=False, enable_custom_logit_processor=False, tool_call_parser=None) | |
2025-06-14 06:28:18,916 - __main__ - WARNING - Attempt 40: Please wait for sglang server to become ready... | |
2025-06-14 06:28:19,111 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:19,111 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:19,111 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:19,111 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:19,111 - __main__ - INFO - | |
2025-06-14 06:28:19,111 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:19,924 - __main__ - WARNING - Attempt 41: Please wait for sglang server to become ready... | |
2025-06-14 06:28:20,020 - __main__ - INFO - [2025-06-14 06:28:20] Use chat template for the OpenAI-compatible API server: qwen2-vl | |
2025-06-14 06:28:20,930 - __main__ - WARNING - Attempt 42: Please wait for sglang server to become ready... | |
2025-06-14 06:28:21,935 - __main__ - WARNING - Attempt 43: Please wait for sglang server to become ready... | |
2025-06-14 06:28:22,322 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:22,322 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:22,325 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/pyairports/airports.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81. | |
2025-06-14 06:28:22,325 - __main__ - INFO - from pkg_resources import resource_string | |
2025-06-14 06:28:22,723 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:22,723 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:22,723 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:22,723 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:22,723 - __main__ - INFO - | |
2025-06-14 06:28:22,723 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:22,734 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:22,734 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:22,734 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:22,734 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:22,734 - __main__ - INFO - | |
2025-06-14 06:28:22,734 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:22,941 - __main__ - WARNING - Attempt 44: Please wait for sglang server to become ready... | |
2025-06-14 06:28:23,947 - __main__ - WARNING - Attempt 45: Please wait for sglang server to become ready... | |
2025-06-14 06:28:24,057 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Overlap scheduler is disabled for multimodal models. | |
2025-06-14 06:28:24,227 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Automatically reduce --mem-fraction-static to 0.760 because this is a multimodal model. | |
2025-06-14 06:28:24,227 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl. | |
2025-06-14 06:28:24,227 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Init torch distributed begin. | |
2025-06-14 06:28:24,267 - __main__ - INFO - /usr/local/lib/python3.11/dist-packages/torch/cuda/__init__.py:235: UserWarning: | |
2025-06-14 06:28:24,267 - __main__ - INFO - NVIDIA GeForce RTX 5090 with CUDA capability sm_120 is not compatible with the current PyTorch installation. | |
2025-06-14 06:28:24,267 - __main__ - INFO - The current PyTorch install supports CUDA capabilities sm_50 sm_60 sm_70 sm_75 sm_80 sm_86 sm_90. | |
2025-06-14 06:28:24,267 - __main__ - INFO - If you want to use the NVIDIA GeForce RTX 5090 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/ | |
2025-06-14 06:28:24,267 - __main__ - INFO - | |
2025-06-14 06:28:24,267 - __main__ - INFO - warnings.warn( | |
2025-06-14 06:28:24,407 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Load weight begin. avail mem=27.12 GB | |
2025-06-14 06:28:24,578 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Ignore import error when loading sglang.srt.models.llava. Failed to import transformers.models.clip.modeling_clip because of the following error (look up to see its traceback): | |
2025-06-14 06:28:24,578 - __main__ - INFO - CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:28:24,578 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:28:24,578 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:28:24,578 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:28:24,578 - __main__ - INFO - | |
2025-06-14 06:28:24,599 - __main__ - INFO - [2025-06-14 06:28:24 TP0] Scheduler hit an exception: Traceback (most recent call last): | |
2025-06-14 06:28:24,599 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 1773, in run_scheduler_process | |
2025-06-14 06:28:24,599 - __main__ - INFO - scheduler = Scheduler(server_args, port_args, gpu_id, tp_rank, dp_rank) | |
2025-06-14 06:28:24,599 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,599 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/scheduler.py", line 239, in __init__ | |
2025-06-14 06:28:24,599 - __main__ - INFO - self.tp_worker = TpWorkerClass( | |
2025-06-14 06:28:24,599 - __main__ - INFO - ^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,599 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/managers/tp_worker.py", line 68, in __init__ | |
2025-06-14 06:28:24,599 - __main__ - INFO - self.model_runner = ModelRunner( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 185, in __init__ | |
2025-06-14 06:28:24,600 - __main__ - INFO - self.load_model() | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_executor/model_runner.py", line 306, in load_model | |
2025-06-14 06:28:24,600 - __main__ - INFO - self.model = get_model( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/__init__.py", line 22, in get_model | |
2025-06-14 06:28:24,600 - __main__ - INFO - return loader.load_model( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 357, in load_model | |
2025-06-14 06:28:24,600 - __main__ - INFO - model = _initialize_model( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/model_loader/loader.py", line 138, in _initialize_model | |
2025-06-14 06:28:24,600 - __main__ - INFO - return model_class( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 435, in __init__ | |
2025-06-14 06:28:24,600 - __main__ - INFO - self.visual = Qwen2VisionTransformer( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 279, in __init__ | |
2025-06-14 06:28:24,600 - __main__ - INFO - self.patch_embed = Qwen2VisionPatchEmbed( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/sglang/srt/models/qwen2_vl.py", line 170, in __init__ | |
2025-06-14 06:28:24,600 - __main__ - INFO - self.proj = nn.Conv3d( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 692, in __init__ | |
2025-06-14 06:28:24,600 - __main__ - INFO - super().__init__( | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 176, in __init__ | |
2025-06-14 06:28:24,600 - __main__ - INFO - self.reset_parameters() | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/conv.py", line 182, in reset_parameters | |
2025-06-14 06:28:24,600 - __main__ - INFO - init.kaiming_uniform_(self.weight, a=math.sqrt(5)) | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 500, in kaiming_uniform_ | |
2025-06-14 06:28:24,600 - __main__ - INFO - return torch.overrides.handle_torch_function( | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/overrides.py", line 1717, in handle_torch_function | |
2025-06-14 06:28:24,600 - __main__ - INFO - result = mode.__torch_function__(public_api, types, args, kwargs) | |
2025-06-14 06:28:24,600 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,600 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/utils/_device.py", line 106, in __torch_function__ | |
2025-06-14 06:28:24,601 - __main__ - INFO - return func(*args, **kwargs) | |
2025-06-14 06:28:24,601 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,601 - __main__ - INFO - File "/usr/local/lib/python3.11/dist-packages/torch/nn/init.py", line 518, in kaiming_uniform_ | |
2025-06-14 06:28:24,601 - __main__ - INFO - return tensor.uniform_(-bound, bound, generator=generator) | |
2025-06-14 06:28:24,601 - __main__ - INFO - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
2025-06-14 06:28:24,601 - __main__ - INFO - RuntimeError: CUDA error: no kernel image is available for execution on the device | |
2025-06-14 06:28:24,601 - __main__ - INFO - CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. | |
2025-06-14 06:28:24,601 - __main__ - INFO - For debugging consider passing CUDA_LAUNCH_BLOCKING=1 | |
2025-06-14 06:28:24,601 - __main__ - INFO - Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. | |
2025-06-14 06:28:24,601 - __main__ - INFO - | |
2025-06-14 06:28:24,601 - __main__ - INFO - | |
2025-06-14 06:28:24,601 - __main__ - INFO - [2025-06-14 06:28:24] Received sigquit from a child proces. It usually means the child failed. | |
2025-06-14 06:28:24,844 - __main__ - WARNING - SGLang server task ended | |
2025-06-14 06:28:24,844 - __main__ - ERROR - Ended up starting the sglang server more than 5 times, cancelling pipeline | |
2025-06-14 06:28:24,844 - __main__ - ERROR - | |
2025-06-14 06:28:24,844 - __main__ - ERROR - Please make sure sglang is installed according to the latest instructions here: https://docs.sglang.ai/start/install.html | |
Exception ignored in atexit callback: <function sglang_server_task.<locals>._kill_proc at 0x7c90c4ec0ea0> | |
Traceback (most recent call last): | |
File "/root/olmocr/pipeline.py", line 593, in _kill_proc | |
proc.terminate() | |
File "/usr/lib/python3.11/asyncio/subprocess.py", line 143, in terminate | |
self._transport.terminate() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 149, in terminate | |
self._check_proc() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 142, in _check_proc | |
raise ProcessLookupError() | |
ProcessLookupError: | |
Exception ignored in atexit callback: <function sglang_server_task.<locals>._kill_proc at 0x7c90c4ec3ce0> | |
Traceback (most recent call last): | |
File "/root/olmocr/pipeline.py", line 593, in _kill_proc | |
proc.terminate() | |
File "/usr/lib/python3.11/asyncio/subprocess.py", line 143, in terminate | |
self._transport.terminate() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 149, in terminate | |
self._check_proc() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 142, in _check_proc | |
raise ProcessLookupError() | |
ProcessLookupError: | |
Exception ignored in atexit callback: <function sglang_server_task.<locals>._kill_proc at 0x7c90c4021300> | |
Traceback (most recent call last): | |
File "/root/olmocr/pipeline.py", line 593, in _kill_proc | |
proc.terminate() | |
File "/usr/lib/python3.11/asyncio/subprocess.py", line 143, in terminate | |
self._transport.terminate() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 149, in terminate | |
self._check_proc() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 142, in _check_proc | |
raise ProcessLookupError() | |
ProcessLookupError: | |
Exception ignored in atexit callback: <function sglang_server_task.<locals>._kill_proc at 0x7c90c41d3c40> | |
Traceback (most recent call last): | |
File "/root/olmocr/pipeline.py", line 593, in _kill_proc | |
proc.terminate() | |
File "/usr/lib/python3.11/asyncio/subprocess.py", line 143, in terminate | |
self._transport.terminate() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 149, in terminate | |
self._check_proc() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 142, in _check_proc | |
raise ProcessLookupError() | |
ProcessLookupError: | |
Exception ignored in atexit callback: <function sglang_server_task.<locals>._kill_proc at 0x7c90c41d3e20> | |
Traceback (most recent call last): | |
File "/root/olmocr/pipeline.py", line 593, in _kill_proc | |
proc.terminate() | |
File "/usr/lib/python3.11/asyncio/subprocess.py", line 143, in terminate | |
self._transport.terminate() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 149, in terminate | |
self._check_proc() | |
File "/usr/lib/python3.11/asyncio/base_subprocess.py", line 142, in _check_proc | |
raise ProcessLookupError() | |
ProcessLookupError: | |
ERROR:asyncio:Task exception was never retrieved | |
future: <Task finished name='Task-2' coro=<sglang_server_host() done, defined at /root/olmocr/pipeline.py:672> exception=SystemExit(1)> | |
Traceback (most recent call last): | |
File "/usr/lib/python3.11/asyncio/runners.py", line 190, in run | |
return runner.run(main) | |
^^^^^^^^^^^^^^^^ | |
File "/usr/lib/python3.11/asyncio/runners.py", line 118, in run | |
return self._loop.run_until_complete(task) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/usr/lib/python3.11/asyncio/base_events.py", line 641, in run_until_complete | |
self.run_forever() | |
File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever | |
self._run_once() | |
File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once | |
handle._run() | |
File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run | |
self._context.run(self._callback, *self._args) | |
File "/root/olmocr/pipeline.py", line 685, in sglang_server_host | |
sys.exit(1) | |
SystemExit: 1 | |
root@2fdffe8b8e20:~# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment