Skip to content

Instantly share code, notes, and snippets.

@flishwang
flishwang / vllm_monkey_patch.py
Created July 4, 2025 04:14
vllm_monkey_patch
import torch
from vllm.v1.worker.gpu_worker import Worker, logger
from vllm.device_allocator.cumem import CuMemAllocator
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.utils import GiB_bytes
from typing import Optional
import gc
from vllm.device_allocator.cumem import libcudart, is_pin_memory_available, unmap_and_release