edoakes · April 15, 2020 16:34
diff --git a/serve_doc_example.py b/serve_doc_example.py
 import requests

 from ray import serve

 serve.init()

 # Main concepts

 ## Endpoints

 # Endpoints define the HTTP paths that your serve application exposes an an API.
 # They can be created as follows:
 serve.create_endpoint("endpoint_identifier", "/route", methods=["GET", "POST"])

 # We can query serve for the registered endpoints as follows:
 routes = requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).json()
 print("Available routes after create_endpoint:", routes)

 # Note that an endpoint alone cannot actually serve requests, because we haven't
 # defined the serving logic! We can do that with a *backend* (see below).

 ## Backends

 # Backends are how you specify the python code that will run to handle requests
 # to a given endpoint.
 # They can be created as follows:

 # The actual python code to run in the handler.
 def handler(flask_request):
    return "hello, world!"

 # Registering the backend.
 serve.create_backend(handler, "backend_identifier")

 # Now that this backend has been registered, we can link it to a previously-created endpoint:
 serve.link("endpoint_identifier", "backend_identifier")

 # Now that the backend and endpoint are linked, we can invoke the defined handler via HTTP:
 print(requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).text)

 # Serve system components
 #     - Master Actor: manages the serve control plane, enacting changes specified via the API.
 #     - HTTP Proxy: listens for inbound HTTP requests, proxies them to the router
 #     - Router: routes requests from HTTP proxy to the workers

 # Advanced:

 ## Using classes as backends:
 class Handler:
    def __init__(self):
        self.msg = "hello, world!"

    def __call__(self, flask_request):
        return self.msg

 serve.create_backend(Handler, "backend_identifier_class")

 ## Multiple replicas for a backend.
 def scaled_handler(flask_request):
    return "hello"

 config = serve.BackendConfig(num_replicas=2)
 serve.create_backend(scaled_handler, "backend_identifier_class", backend_config=config)

 ## Splitting traffic for an endpoint.
 def handler1(flask_request):
    return "hello1"

 def handler2(flask_request):
    return "hello2"

 serve.create_endpoint("endpoint_identifier_split", "/split", methods=["GET", "POST"])
 serve.create_backend(handler1, "backend_identifier_1")
 serve.create_backend(handler2, "backend_identifier_2")

 # 70% of requests go to handler1, 30% to backend2.
 serve.split("endpoint_identifier_split", {"backend_identifier_1": 0.7, "backend_identifier_2": 0.3})

 ## Batching
 class BatchingExample:
    def __init__(self):
        self.count = 0

    @serve.accept_batch
    def __call__(self, flask_request):
        self.count += 1
        batch_size = serve.context.batch_size
        return [self.count] * batch_size

 serve.create_endpoint("counter1", "/increment")

 config = BackendConfig(max_batch_size=5)
 serve.create_backend(
    BatchingExample, "counter:v11", backend_config=config)
 serve.link("counter1", "counter:v11")
	import requests

	from ray import serve

	serve.init()

	# Main concepts

	## Endpoints

	# Endpoints define the HTTP paths that your serve application exposes an an API.
	# They can be created as follows:
	serve.create_endpoint("endpoint_identifier", "/route", methods=["GET", "POST"])

	# We can query serve for the registered endpoints as follows:
	routes = requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).json()
	print("Available routes after create_endpoint:", routes)

	# Note that an endpoint alone cannot actually serve requests, because we haven't
	# defined the serving logic! We can do that with a backend (see below).

	## Backends

	# Backends are how you specify the python code that will run to handle requests
	# to a given endpoint.
	# They can be created as follows:

	# The actual python code to run in the handler.
	def handler(flask_request):
	return "hello, world!"

	# Registering the backend.
	serve.create_backend(handler, "backend_identifier")

	# Now that this backend has been registered, we can link it to a previously-created endpoint:
	serve.link("endpoint_identifier", "backend_identifier")

	# Now that the backend and endpoint are linked, we can invoke the defined handler via HTTP:
	print(requests.get("http://127.0.0.1:8000/-/routes", timeout=0.5).text)

	# Serve system components
	# - Master Actor: manages the serve control plane, enacting changes specified via the API.
	# - HTTP Proxy: listens for inbound HTTP requests, proxies them to the router
	# - Router: routes requests from HTTP proxy to the workers

	# Advanced:

	## Using classes as backends:
	class Handler:
	def __init__(self):
	self.msg = "hello, world!"

	def __call__(self, flask_request):
	return self.msg

	serve.create_backend(Handler, "backend_identifier_class")

	## Multiple replicas for a backend.
	def scaled_handler(flask_request):
	return "hello"

	config = serve.BackendConfig(num_replicas=2)
	serve.create_backend(scaled_handler, "backend_identifier_class", backend_config=config)

	## Splitting traffic for an endpoint.
	def handler1(flask_request):
	return "hello1"

	def handler2(flask_request):
	return "hello2"

	serve.create_endpoint("endpoint_identifier_split", "/split", methods=["GET", "POST"])
	serve.create_backend(handler1, "backend_identifier_1")
	serve.create_backend(handler2, "backend_identifier_2")

	# 70% of requests go to handler1, 30% to backend2.
	serve.split("endpoint_identifier_split", {"backend_identifier_1": 0.7, "backend_identifier_2": 0.3})

	## Batching
	class BatchingExample:
	def __init__(self):
	self.count = 0

	@serve.accept_batch
	def __call__(self, flask_request):
	self.count += 1
	batch_size = serve.context.batch_size
	return [self.count] * batch_size

	serve.create_endpoint("counter1", "/increment")

	config = BackendConfig(max_batch_size=5)
	serve.create_backend(
	BatchingExample, "counter:v11", backend_config=config)
	serve.link("counter1", "counter:v11")