serve run queue_proxy:app
python client.py
(base) ➜ tmp python client.py
from ray import serve | |
@serve.deployment | |
def app(): | |
return "hi" |
In this example, we will deploy stable diffusion model on Anyscale using [Anyscale Production Services]: https://docs.anyscale.com/user-guide/run-and-monitor/production-services
Once you have onboarded to Anyscale and set up your local environment. You can download the content of this gist to your laptop.
" Sensible setup | |
set nocompatible | |
filetype plugin on | |
" Install Vim Plug | |
if empty(glob('~/.vim/autoload/plug.vim')) | |
silent !curl -fLo ~/.vim/autoload/plug.vim --create-dirs | |
\ https://raw.githubusercontent.com/junegunn/vim-plug/master/plug.vim | |
endif |
kind: ConfigMap | |
apiVersion: v1 | |
metadata: | |
name: app-py-script | |
data: | |
app.py: | | |
from fastapi import FastAPI | |
import asyncio | |
app = FastAPI() |
from ray import serve | |
@serve.deployment(route_prefix="/", num_replicas=20) | |
def f(): | |
return "Hello" | |
serve.start(detached=True, http_options={"host": "0.0.0.0"}) | |
f.deploy() |
This snippet implements a simple strategy for model multiplexing in Ray Serve.
pip install ray[serve]
app.py
serve run app:entrypoint
from ray import serve | |
from ray.serve.drivers import DAGDriver | |
from ray.serve.dag import InputNode | |
from ray.serve.http_adapters import json_request | |
@serve.deployment | |
class A: | |
def predict(self, inp): | |
return inp |
import asyncio | |
import types | |
from scanner import _PyObjScanner | |
corotinue = types.CoroutineType | |
async def main(): | |
scanner = _PyObjScanner() | |
async def f(): | |
pass |
from io import BytesIO | |
import random | |
import time | |
from pydantic import BaseModel | |
from pprint import pprint | |
import threading | |
import requests | |
import torch | |
import torchvision.models as models |