Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save aribornstein/3a4ce90c6e4a78ba87fdfe2118efdf27 to your computer and use it in GitHub Desktop.
Save aribornstein/3a4ce90c6e4a78ba87fdfe2118efdf27 to your computer and use it in GitHub Desktop.
set_environment_variables_for_nccl_backend
def set_environment_variables_for_nccl_backend(single_node=False, master_port=6105):
if not single_node:
master_node_params = os.environ["AZ_BATCH_MASTER_NODE"].split(":")
os.environ["MASTER_ADDR"] = master_node_params[0]
# Do not overwrite master port with that defined in AZ_BATCH_MASTER_NODE
if "MASTER_PORT" not in os.environ:
os.environ["MASTER_PORT"] = str(master_port)
else:
os.environ["MASTER_ADDR"] = os.environ["AZ_BATCHAI_MPI_MASTER_NODE"]
os.environ["MASTER_PORT"] = "54965"
os.environ["NCCL_SOCKET_IFNAME"] = "^docker0,lo"
os.environ["NODE_RANK"] = os.environ[
"OMPI_COMM_WORLD_RANK"
] # node rank is the world_rank from mpi run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment