Here are the full supported engine configurations:
model_id: <HF model ID or local model path>
llm_engine: vllm
accelerator_type: <GPU type>
from time import perf_counter | |
from time import sleep | |
from contextlib import contextmanager | |
@contextmanager | |
def catchtime() -> Callable[[], float]: | |
t1 = t2 = perf_counter() | |
yield lambda: t2 - t1 | |
t2 = perf_counter() |
import torch | |
import ray.data | |
class DataGenerator: | |
def __init__(self, permute_config): | |
device = torch.device("cuda") | |
self.model = Model().to(device) | |
self.config = permute_config | |
def __call__(self, input): |
import ray.data | |
class DataGenerator: | |
def __init__(self, permute_config): | |
device = torch.device("cuda") | |
self.model = Model().to(device) | |
self.config = permute_config | |
def __call__(self, input): | |
for test_input in self.permute(permute_config, input): |
import ray.data | |
class DataGenerator: | |
def __init__(self, permute_config): | |
device = torch.device("cuda") | |
self.model = Model().to(device) | |
self.config = permute_config | |
def __call__(self, input): | |
for test_input in self.permute(permute_config, input): |
import requests | |
from datetime import datetime | |
def get_issues_with_ray_in_title(repo_name): | |
issues = [] | |
page = 1 | |
# headers = {'Authorization': 'token YOUR_GITHUB_TOKEN'} | |
while True: | |
issues_url = f"https://api.github.com/repos/{repo_name}/issues?page={page}&per_page=100&state=open" | |
response = requests.get(issues_url)# headers=headers) | |
if response.status_code == 200: |
import requests | |
from datetime import datetime | |
def get_issues_with_ray_in_title(repo_name): | |
issues = [] | |
page = 1 | |
# headers = {'Authorization': 'token YOUR_GITHUB_TOKEN'} | |
while True: | |
issues_url = f"https://api.github.com/repos/{repo_name}/issues?page={page}&per_page=100&state=open" | |
response = requests.get(issues_url)# headers=headers) | |
if response.status_code == 200: |