cheeyeo · April 25, 2025 16:18
diff --git a/output.sh b/output.sh
 root@8423ac911e05:/opt/tritonserver# tritonserver --model-repository=model_repository
 I0425 10:08:43.682014 1236 pinned_memory_manager.cc:241] Pinned memory pool is created at '0x77fa88000000' with size 268435456
 I0425 10:08:43.682247 1236 cuda_memory_manager.cc:107] CUDA memory pool is created on device 0 with size 67108864
 I0425 10:08:43.684950 1236 model_lifecycle.cc:461] loading: detection_postprocessing:1
 W0425 10:08:43.685049 1236 model_lifecycle.cc:108] ignore version directory 'hf_cache' which fails to convert to integral number
 I0425 10:08:43.685063 1236 model_lifecycle.cc:461] loading: sam_vit:1
 W0425 10:08:43.685125 1236 model_lifecycle.cc:108] ignore version directory 'hf_cache' which fails to convert to integral number
 I0425 10:08:43.685138 1236 model_lifecycle.cc:461] loading: grounding_dino_tiny:1
 I0425 10:08:45.251843 1236 python_be.cc:2363] TRITONBACKEND_ModelInstanceInitialize: detection_postprocessing_0 (GPU device 0)
 I0425 10:08:45.654267 1236 model_lifecycle.cc:818] successfully loaded 'detection_postprocessing'
 I0425 10:08:48.698877 1236 python_be.cc:2363] TRITONBACKEND_ModelInstanceInitialize: sam_vit_0 (GPU device 0)
 I0425 10:08:49.068813 1236 python_be.cc:2363] TRITONBACKEND_ModelInstanceInitialize: grounding_dino_tiny_0 (GPU device 0)
 I0425 10:08:52.116161 1236 model.py:24] Loading HuggingFace model: facebook/sam-vit-base
 I0425 10:08:52.436684 1236 model.py:23] Loading HuggingFace model: IDEA-Research/grounding-dino-tiny
 I0425 10:08:53.158430 1236 model_lifecycle.cc:818] successfully loaded 'sam_vit'
 I0425 10:08:54.966951 1236 model_lifecycle.cc:818] successfully loaded 'grounding_dino_tiny'
 I0425 10:08:54.967230 1236 model_lifecycle.cc:461] loading: ensemble_model:1
 I0425 10:08:54.967405 1236 model_lifecycle.cc:818] successfully loaded 'ensemble_model'
 I0425 10:08:54.967505 1236 server.cc:606] 
 +------------------+------+
 | Repository Agent | Path |
 +------------------+------+
 +------------------+------+

 I0425 10:08:54.967548 1236 server.cc:633] 
 +---------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
 | Backend | Path                                                  | Config                                                                                                                               |
 +---------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
 | python  | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"true","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","defa |
 |         |                                                       | ult-max-batch-size":"4"}}                                                                                                            |
 +---------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+

 I0425 10:08:54.967597 1236 server.cc:676] 
 +--------------------------+---------+--------+
 | Model                    | Version | Status |
 +--------------------------+---------+--------+
 | detection_postprocessing | 1       | READY  |
 | ensemble_model           | 1       | READY  |
 | grounding_dino_tiny      | 1       | READY  |
 | sam_vit                  | 1       | READY  |
 +--------------------------+---------+--------+

 I0425 10:08:55.009942 1236 metrics.cc:817] Collecting metrics for GPU 0: NVIDIA GeForce GTX 1060 with Max-Q Design
 I0425 10:08:55.011949 1236 metrics.cc:710] Collecting CPU metrics
 I0425 10:08:55.012109 1236 tritonserver.cc:2483] 
 +----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | Option                           | Value                                                                                                                                                              |
 +----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | server_id                        | triton                                                                                                                                                             |
 | server_version                   | 2.41.0                                                                                                                                                             |
 | server_extensions                | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_te |
 |                                  | nsor_data parameters statistics trace logging                                                                                                                      |
 | model_repository_path[0]         | model_repository                                                                                                                                                   |
 | model_control_mode               | MODE_NONE                                                                                                                                                          |
 | strict_model_config              | 0                                                                                                                                                                  |
 | rate_limit                       | OFF                                                                                                                                                                |
 | pinned_memory_pool_byte_size     | 268435456                                                                                                                                                          |
 | cuda_memory_pool_byte_size{0}    | 67108864                                                                                                                                                           |
 | min_supported_compute_capability | 6.0                                                                                                                                                                |
 | strict_readiness                 | 1                                                                                                                                                                  |
 | exit_timeout                     | 30                                                                                                                                                                 |
 | cache_enabled                    | 0                                                                                                                                                                  |
 +----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+

 I0425 10:08:55.013241 1236 grpc_server.cc:2495] Started GRPCInferenceService at 0.0.0.0:8001
 I0425 10:08:55.013667 1236 http_server.cc:4619] Started HTTPService at 0.0.0.0:8000
 I0425 10:08:55.056199 1236 http_server.cc:282] Started Metrics Service at 0.0.0.0:8002
 W0425 10:08:56.018599 1236 metrics.cc:582] Unable to get power limit for GPU 0. Status:Success, value:0.000000
 W0425 10:08:57.027643 1236 metrics.cc:582] Unable to get power limit for GPU 0. Status:Success, value:0.000000
 W0425 10:08:58.028778 1236 metrics.cc:582] Unable to get power limit for GPU 0. Status:Success, value:0.000000
	root@8423ac911e05:/opt/tritonserver# tritonserver --model-repository=model_repository
	I0425 10:08:43.682014 1236 pinned_memory_manager.cc:241] Pinned memory pool is created at '0x77fa88000000' with size 268435456
	I0425 10:08:43.682247 1236 cuda_memory_manager.cc:107] CUDA memory pool is created on device 0 with size 67108864
	I0425 10:08:43.684950 1236 model_lifecycle.cc:461] loading: detection_postprocessing:1
	W0425 10:08:43.685049 1236 model_lifecycle.cc:108] ignore version directory 'hf_cache' which fails to convert to integral number
	I0425 10:08:43.685063 1236 model_lifecycle.cc:461] loading: sam_vit:1
	W0425 10:08:43.685125 1236 model_lifecycle.cc:108] ignore version directory 'hf_cache' which fails to convert to integral number
	I0425 10:08:43.685138 1236 model_lifecycle.cc:461] loading: grounding_dino_tiny:1
	I0425 10:08:45.251843 1236 python_be.cc:2363] TRITONBACKEND_ModelInstanceInitialize: detection_postprocessing_0 (GPU device 0)
	I0425 10:08:45.654267 1236 model_lifecycle.cc:818] successfully loaded 'detection_postprocessing'
	I0425 10:08:48.698877 1236 python_be.cc:2363] TRITONBACKEND_ModelInstanceInitialize: sam_vit_0 (GPU device 0)
	I0425 10:08:49.068813 1236 python_be.cc:2363] TRITONBACKEND_ModelInstanceInitialize: grounding_dino_tiny_0 (GPU device 0)
	I0425 10:08:52.116161 1236 model.py:24] Loading HuggingFace model: facebook/sam-vit-base
	I0425 10:08:52.436684 1236 model.py:23] Loading HuggingFace model: IDEA-Research/grounding-dino-tiny
	I0425 10:08:53.158430 1236 model_lifecycle.cc:818] successfully loaded 'sam_vit'
	I0425 10:08:54.966951 1236 model_lifecycle.cc:818] successfully loaded 'grounding_dino_tiny'
	I0425 10:08:54.967230 1236 model_lifecycle.cc:461] loading: ensemble_model:1
	I0425 10:08:54.967405 1236 model_lifecycle.cc:818] successfully loaded 'ensemble_model'
	I0425 10:08:54.967505 1236 server.cc:606]
	+------------------+------+
	\| Repository Agent \| Path \|
	+------------------+------+
	+------------------+------+

	I0425 10:08:54.967548 1236 server.cc:633]
	+---------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
	\| Backend \| Path \| Config \|
	+---------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+
	\| python \| /opt/tritonserver/backends/python/libtriton_python.so \| {"cmdline":{"auto-complete-config":"true","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","defa \|
	\| \| \| ult-max-batch-size":"4"}} \|
	+---------+-------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+

	I0425 10:08:54.967597 1236 server.cc:676]
	+--------------------------+---------+--------+
	\| Model \| Version \| Status \|
	+--------------------------+---------+--------+
	\| detection_postprocessing \| 1 \| READY \|
	\| ensemble_model \| 1 \| READY \|
	\| grounding_dino_tiny \| 1 \| READY \|
	\| sam_vit \| 1 \| READY \|
	+--------------------------+---------+--------+

	I0425 10:08:55.009942 1236 metrics.cc:817] Collecting metrics for GPU 0: NVIDIA GeForce GTX 1060 with Max-Q Design
	I0425 10:08:55.011949 1236 metrics.cc:710] Collecting CPU metrics
	I0425 10:08:55.012109 1236 tritonserver.cc:2483]
	+----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
	\| Option \| Value \|
	+----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+
	\| server_id \| triton \|
	\| server_version \| 2.41.0 \|
	\| server_extensions \| classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_te \|
	\| \| nsor_data parameters statistics trace logging \|
	\| model_repository_path[0] \| model_repository \|
	\| model_control_mode \| MODE_NONE \|
	\| strict_model_config \| 0 \|
	\| rate_limit \| OFF \|
	\| pinned_memory_pool_byte_size \| 268435456 \|
	\| cuda_memory_pool_byte_size{0} \| 67108864 \|
	\| min_supported_compute_capability \| 6.0 \|
	\| strict_readiness \| 1 \|
	\| exit_timeout \| 30 \|
	\| cache_enabled \| 0 \|
	+----------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+

	I0425 10:08:55.013241 1236 grpc_server.cc:2495] Started GRPCInferenceService at 0.0.0.0:8001
	I0425 10:08:55.013667 1236 http_server.cc:4619] Started HTTPService at 0.0.0.0:8000
	I0425 10:08:55.056199 1236 http_server.cc:282] Started Metrics Service at 0.0.0.0:8002
	W0425 10:08:56.018599 1236 metrics.cc:582] Unable to get power limit for GPU 0. Status:Success, value:0.000000
	W0425 10:08:57.027643 1236 metrics.cc:582] Unable to get power limit for GPU 0. Status:Success, value:0.000000
	W0425 10:08:58.028778 1236 metrics.cc:582] Unable to get power limit for GPU 0. Status:Success, value:0.000000