Skip to content

Instantly share code, notes, and snippets.

@edoakes
Last active April 15, 2020 16:34
Show Gist options
  • Save edoakes/58903277172880e5386ca2af9c8027d2 to your computer and use it in GitHub Desktop.
Save edoakes/58903277172880e5386ca2af9c8027d2 to your computer and use it in GitHub Desktop.
import requests
from ray import serve
serve.init()
# Main concepts
## Endpoints
# Endpoints define the HTTP paths that your serve application exposes an an API.
# They can be created as follows:
serve.create_endpoint("endpoint_identifier", "/route", methods=["GET", "POST"])
# We can query serve for the registered endpoints as follows:
routes = requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).json()
print("Available routes after create_endpoint:", routes)
# Note that an endpoint alone cannot actually serve requests, because we haven't
# defined the serving logic! We can do that with a *backend* (see below).
## Backends
# Backends are how you specify the python code that will run to handle requests
# to a given endpoint.
# They can be created as follows:
# The actual python code to run in the handler.
def handler(flask_request):
return "hello, world!"
# Registering the backend.
serve.create_backend(handler, "backend_identifier")
# Now that this backend has been registered, we can link it to a previously-created endpoint:
serve.link("endpoint_identifier", "backend_identifier")
# Now that the backend and endpoint are linked, we can invoke the defined handler via HTTP:
print(requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).text)
# Serve system components
# - Master Actor: manages the serve control plane, enacting changes specified via the API.
# - HTTP Proxy: listens for inbound HTTP requests, proxies them to the router
# - Router: routes requests from HTTP proxy to the workers
# Advanced:
## Using classes as backends:
class Handler:
def __init__(self):
self.msg = "hello, world!"
def __call__(self, flask_request):
return self.msg
serve.create_backend(Handler, "backend_identifier_class")
## Multiple replicas for a backend.
def scaled_handler(flask_request):
return "hello"
config = serve.BackendConfig(num_replicas=2)
serve.create_backend(scaled_handler, "backend_identifier_class", backend_config=config)
## Splitting traffic for an endpoint.
def handler1(flask_request):
return "hello1"
def handler2(flask_request):
return "hello2"
serve.create_endpoint("endpoint_identifier_split", "/split", methods=["GET", "POST"])
serve.create_backend(handler1, "backend_identifier_1")
serve.create_backend(handler2, "backend_identifier_2")
# 70% of requests go to handler1, 30% to backend2.
serve.split("endpoint_identifier_split", {"backend_identifier_1": 0.7, "backend_identifier_2": 0.3})
## Batching
class BatchingExample:
def __init__(self):
self.count = 0
@serve.accept_batch
def __call__(self, flask_request):
self.count += 1
batch_size = serve.context.batch_size
return [self.count] * batch_size
serve.create_endpoint("counter1", "/increment")
config = BackendConfig(max_batch_size=5)
serve.create_backend(
BatchingExample, "counter:v11", backend_config=config)
serve.link("counter1", "counter:v11")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment