Created
January 1, 2022 05:16
-
-
Save bakfoo/faa0efdc9b2730c5f65ce2ee29fa80c5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
import sys | |
import time | |
import ray | |
""" This script is meant to be run from a pod in the same Kubernetes namespace | |
as your Ray cluster. | |
""" | |
@ray.remote | |
def gethostname(x): | |
import platform | |
import time | |
time.sleep(0.01) | |
return x + (platform.node(), ) | |
def wait_for_nodes(expected): | |
# Wait for all nodes to join the cluster. | |
while True: | |
resources = ray.cluster_resources() | |
node_keys = [key for key in resources if "node" in key] | |
num_nodes = sum(resources[node_key] for node_key in node_keys) | |
if num_nodes < expected: | |
print("{} nodes have joined so far, waiting for {} more.".format( | |
num_nodes, expected - num_nodes)) | |
sys.stdout.flush() | |
time.sleep(1) | |
else: | |
break | |
def main(): | |
wait_for_nodes(3) | |
# Check that objects can be transferred from each node to each other node. | |
for i in range(10): | |
print("Iteration {}".format(i)) | |
results = [ | |
gethostname.remote(gethostname.remote(())) for _ in range(100) | |
] | |
print(Counter(ray.get(results))) | |
sys.stdout.flush() | |
print("Success!") | |
sys.stdout.flush() | |
if __name__ == "__main__": | |
ray.init("ray://raynodes-ray-head:10001") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment