Skip to content

Instantly share code, notes, and snippets.

@muellerzr
Created August 28, 2025 00:47
Show Gist options
  • Save muellerzr/62291eb5eb9f1948de40889dd702c046 to your computer and use it in GitHub Desktop.
Save muellerzr/62291eb5eb9f1948de40889dd702c046 to your computer and use it in GitHub Desktop.
mlx issue part 2
# Copyright © 2024 Apple Inc.
"""
Run with:
```
OMPI_MCA_mpi_abort_print_stack=1 mlx.launch --hostfile hosts.json --backend mpi --mpi-arg "--mca btl_tcp_if_exclude lo0,169.254.0.0/16,fe80::/10 --mca mpi_show_mca_params all --mca coll_base_verbose 100 --mca btl_base_verbose 100" script.py
```
Make sure you can run MLX over MPI on two hosts. For more information see the
documentation:
https://ml-explore.github.io/mlx/build/html/usage/distributed.html).
"""
import argparse
import json
import resource
from pathlib import Path
import mlx.core as mx
from huggingface_hub import snapshot_download
from mlx.utils import tree_flatten
from mlx_lm import load, stream_generate
from mlx_lm.utils import load_model, load_tokenizer
# Needed for 8 bit model
resource.setrlimit(resource.RLIMIT_NOFILE, (2048, 4096))
def download(repo: str, allow_patterns: list[str]) -> Path:
return Path(
snapshot_download(
repo,
allow_patterns=allow_patterns,
)
)
def shard_and_load(repo):
# Get model path with everything but weight safetensors
# model_path = download(
# args.model,
# allow_patterns=["*.json", "*.py", "tokenizer.model", "*.tiktoken", "*.txt"],
# )
model_path = Path("/Users/muellerzr/mlx_stuff/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx")
# Lazy load and shard model to figure out
# which weights we need
model, config = load_model(model_path, lazy=True, strict=False)
group = mx.distributed.init()
rank = group.rank()
model.model.pipeline(group)
# Figure out which files we need for the local shard
with open(model_path / "model.safetensors.index.json", "r") as fid:
weight_index = json.load(fid)["weight_map"]
local_files = set()
for k, _ in tree_flatten(model.parameters()):
local_files.add(weight_index[k])
# Download weights for local shard
# download(args.model, allow_patterns=local_files)
# Load and shard the model, and load the weights
tokenizer = load_tokenizer(
model_path,
{"trust_remote_code": True},
eos_token_ids=config.get("eos_token_id", None),
)
model, _ = load_model(model_path, lazy=True, strict=False)
model.model.pipeline(group)
mx.eval(model.parameters())
# Synchronize processes before generation to avoid timeout if downloading
# model for the first time.
mx.eval(mx.distributed.all_sum(mx.array(1.0), stream=mx.cpu))
return model, tokenizer
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="LLM pipelined inference example")
parser.add_argument(
"--model",
default="DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx",
help="HF repo or path to local model.",
)
parser.add_argument(
"--prompt",
"-p",
default="What is the square root of 4",
help="Message to be processed by the model ('-' reads from stdin)",
)
parser.add_argument(
"--max-tokens",
"-m",
type=int,
default=256,
help="Maximum number of tokens to generate",
)
args = parser.parse_args()
group = mx.distributed.init()
rank = group.rank()
def rprint(*args, **kwargs):
if rank == 0:
print(*args, **kwargs)
model, tokenizer = shard_and_load(args.model)
messages = [{"role": "user", "content": args.prompt}]
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
for response in stream_generate(
model, tokenizer, prompt, max_tokens=args.max_tokens
):
rprint(response.text, end="", flush=True)
rprint()
rprint("=" * 10)
rprint(
f"Prompt: {response.prompt_tokens} tokens, "
f"{response.prompt_tps:.3f} tokens-per-sec"
)
rprint(
f"Generation: {response.generation_tokens} tokens, "
f"{response.generation_tps:.3f} tokens-per-sec"
)
rprint(f"Peak memory: {response.peak_memory:.3f} GB")
[balthasar.local:04077] SET DYLD_LIBRARY_PATH=/opt/homebrew/lib
[balthasar.local:04079] mca: base: components_register: registering framework btl components
[balthasar.local:04079] mca: base: components_register: found loaded component self
[balthasar.local:04079] mca: base: components_register: component self register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component sm
[balthasar.local:04079] mca: base: components_register: component sm register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component tcp
[balthasar.local:04079] mca: base: components_register: component tcp register function successful
[balthasar.local:04079] mca: base: components_open: opening btl components
[balthasar.local:04079] mca: base: components_open: found loaded component self
[balthasar.local:04079] mca: base: components_open: component self open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component sm
[balthasar.local:04079] mca: base: components_open: component sm open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component tcp
[balthasar.local:04079] mca: base: components_open: component tcp open function successful
[balthasar.local:04079] mca: base: components_register: registering framework coll components
[balthasar.local:04079] mca: base: components_register: found loaded component adapt
[balthasar.local:04079] mca: base: components_register: component adapt register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component basic
[balthasar.local:04079] mca: base: components_register: component basic register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component han
[balthasar.local:04079] mca: base: components_register: component han register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component inter
[balthasar.local:04079] mca: base: components_register: component inter register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component libnbc
[balthasar.local:04079] mca: base: components_register: component libnbc register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component self
[balthasar.local:04079] mca: base: components_register: component self register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component sync
[balthasar.local:04079] mca: base: components_register: component sync register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component tuned
[balthasar.local:04079] mca: base: components_register: component tuned register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component ftagree
[balthasar.local:04079] mca: base: components_register: component ftagree register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component monitoring
[balthasar.local:04079] mca: base: components_register: component monitoring register function successful
[balthasar.local:04079] mca: base: components_register: found loaded component sm
[balthasar.local:04079] mca: base: components_register: component sm register function successful
[balthasar.local:04079] mca: base: components_open: opening coll components
[balthasar.local:04079] mca: base: components_open: found loaded component adapt
[balthasar.local:04079] mca: base: components_open: component adapt open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component basic
[balthasar.local:04079] mca: base: components_open: found loaded component han
[balthasar.local:04079] mca: base: components_open: component han open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component inter
[balthasar.local:04079] mca: base: components_open: found loaded component libnbc
[balthasar.local:04079] mca: base: components_open: component libnbc open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component self
[balthasar.local:04079] mca: base: components_open: found loaded component sync
[balthasar.local:04079] mca: base: components_open: found loaded component tuned
[balthasar.local:04079] mca: base: components_open: component tuned open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component ftagree
[balthasar.local:04079] mca: base: components_open: found loaded component monitoring
[balthasar.local:04079] mca: base: components_open: component monitoring open function successful
[balthasar.local:04079] mca: base: components_open: found loaded component sm
[balthasar.local:04079] select: initializing btl component self
[balthasar.local:04079] select: init of component self returned success
[balthasar.local:04079] select: initializing btl component sm
[balthasar.local:04079] select: init of component sm returned failure
[balthasar.local:04079] mca: base: close: component sm closed
[balthasar.local:04079] mca: base: close: unloading component sm
[balthasar.local:04079] select: initializing btl component tcp
[balthasar.local:04079] btl: tcp: Using interface: lo0
[balthasar.local:04079] btl: tcp: Searching for exclude address+prefix: 169.254.0.0 / 16
[balthasar.local:04079] btl: tcp: Found match: 169.254.175.10 (en12)
[balthasar.local:04079] btl: tcp: Found match: 169.254.195.121 (bridge0)
[balthasar.local:04079] btl: tcp: Using interface: fe80::/10
[balthasar.local:04079] btl:tcp: 0x13c8a2e40: if en0 kidx 24 cnt 0 addr fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5 IPv6 bw 100 lt 100
[balthasar.local:04079] btl:tcp: 0x13c8a31a0: if en0 kidx 4 cnt 0 addr 192.168.68.80 IPv4 bw 100 lt 100
[balthasar.local:04079] btl:tcp: Attempting to bind to AF_INET port 1024
[balthasar.local:04079] btl:tcp: Successfully bound to AF_INET port 1024
[balthasar.local:04079] btl:tcp: my listening v4 socket is 0.0.0.0:1024
[balthasar.local:04079] btl:tcp: Attempting to bind to AF_INET6 port 1024
[balthasar.local:04079] btl:tcp: Successfully bound to AF_INET6 port 1024
[balthasar.local:04079] btl:tcp: my listening v6 socket port is 1024
[balthasar.local:04079] btl: tcp: exchange: 0 24 IPv6 fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5
[balthasar.local:04079] btl: tcp: exchange: 1 4 IPv4 192.168.68.80
[balthasar.local:04079] select: init of component tcp returned success
[melchior.local:03126] mca: base: components_register: registering framework btl components
[melchior.local:03126] mca: base: components_register: found loaded component self
[melchior.local:03126] mca: base: components_register: component self register function successful
[melchior.local:03126] mca: base: components_register: found loaded component sm
[melchior.local:03126] mca: base: components_register: component sm register function successful
[melchior.local:03126] mca: base: components_register: found loaded component tcp
[melchior.local:03126] mca: base: components_register: component tcp register function successful
[melchior.local:03126] mca: base: components_open: opening btl components
[melchior.local:03126] mca: base: components_open: found loaded component self
[melchior.local:03126] mca: base: components_open: component self open function successful
[melchior.local:03126] mca: base: components_open: found loaded component sm
[melchior.local:03126] mca: base: components_open: component sm open function successful
[melchior.local:03126] mca: base: components_open: found loaded component tcp
[melchior.local:03126] mca: base: components_open: component tcp open function successful
[melchior.local:03126] mca: base: components_register: registering framework coll components
[melchior.local:03126] mca: base: components_register: found loaded component adapt
[melchior.local:03126] mca: base: components_register: component adapt register function successful
[melchior.local:03126] mca: base: components_register: found loaded component basic
[melchior.local:03126] mca: base: components_register: component basic register function successful
[melchior.local:03126] mca: base: components_register: found loaded component han
[melchior.local:03126] mca: base: components_register: component han register function successful
[melchior.local:03126] mca: base: components_register: found loaded component inter
[melchior.local:03126] mca: base: components_register: component inter register function successful
[melchior.local:03126] mca: base: components_register: found loaded component libnbc
[melchior.local:03126] mca: base: components_register: component libnbc register function successful
[melchior.local:03126] mca: base: components_register: found loaded component self
[melchior.local:03126] mca: base: components_register: component self register function successful
[melchior.local:03126] mca: base: components_register: found loaded component sync
[melchior.local:03126] mca: base: components_register: component sync register function successful
[melchior.local:03126] mca: base: components_register: found loaded component tuned
[melchior.local:03126] mca: base: components_register: component tuned register function successful
[melchior.local:03126] mca: base: components_register: found loaded component ftagree
[melchior.local:03126] mca: base: components_register: component ftagree register function successful
[melchior.local:03126] mca: base: components_register: found loaded component monitoring
[melchior.local:03126] mca: base: components_register: component monitoring register function successful
[melchior.local:03126] mca: base: components_register: found loaded component sm
[melchior.local:03126] mca: base: components_register: component sm register function successful
[melchior.local:03126] mca: base: components_open: opening coll components
[melchior.local:03126] mca: base: components_open: found loaded component adapt
[melchior.local:03126] mca: base: components_open: component adapt open function successful
[melchior.local:03126] mca: base: components_open: found loaded component basic
[melchior.local:03126] mca: base: components_open: found loaded component han
[melchior.local:03126] mca: base: components_open: component han open function successful
[melchior.local:03126] mca: base: components_open: found loaded component inter
[melchior.local:03126] mca: base: components_open: found loaded component libnbc
[melchior.local:03126] mca: base: components_open: component libnbc open function successful
[melchior.local:03126] mca: base: components_open: found loaded component self
[melchior.local:03126] mca: base: components_open: found loaded component sync
[melchior.local:03126] mca: base: components_open: found loaded component tuned
[melchior.local:03126] mca: base: components_open: component tuned open function successful
[melchior.local:03126] mca: base: components_open: found loaded component ftagree
[melchior.local:03126] mca: base: components_open: found loaded component monitoring
[melchior.local:03126] mca: base: components_open: component monitoring open function successful
[melchior.local:03126] mca: base: components_open: found loaded component sm
[melchior.local:03126] select: initializing btl component self
[melchior.local:03126] select: init of component self returned success
[melchior.local:03126] select: initializing btl component sm
[melchior.local:03126] select: init of component sm returned failure
[melchior.local:03126] mca: base: close: component sm closed
[melchior.local:03126] mca: base: close: unloading component sm
[melchior.local:03126] select: initializing btl component tcp
[melchior.local:03126] btl: tcp: Using interface: lo0
[melchior.local:03126] btl: tcp: Searching for exclude address+prefix: 169.254.0.0 / 16
[melchior.local:03126] btl: tcp: Found match: 169.254.71.220 (bridge0)
[melchior.local:03126] btl: tcp: Found match: 169.254.222.63 (en14)
[melchior.local:03126] btl: tcp: Using interface: fe80::/10
[melchior.local:03126] btl:tcp: 0x12753c8f0: if en0 kidx 24 cnt 0 addr fdcd:ee7b:759e:1744:8:4538:670e:89cb IPv6 bw 100 lt 100
[melchior.local:03126] btl:tcp: 0x12753cc50: if en0 kidx 3 cnt 0 addr 192.168.68.53 IPv4 bw 100 lt 100
[melchior.local:03126] btl:tcp: Attempting to bind to AF_INET port 1024
[melchior.local:03126] btl:tcp: Successfully bound to AF_INET port 1024
[melchior.local:03126] btl:tcp: my listening v4 socket is 0.0.0.0:1024
[melchior.local:03126] btl:tcp: Attempting to bind to AF_INET6 port 1024
[melchior.local:03126] btl:tcp: Successfully bound to AF_INET6 port 1024
[melchior.local:03126] btl:tcp: my listening v6 socket port is 1024
[melchior.local:03126] btl: tcp: exchange: 0 24 IPv6 fdcd:ee7b:759e:1744:8:4538:670e:89cb
[melchior.local:03126] btl: tcp: exchange: 1 3 IPv4 192.168.68.53
[melchior.local:03126] select: init of component tcp returned success
[balthasar.local:04079] coll:find_available: querying coll component adapt
[balthasar.local:04079] coll:find_available: coll component adapt is available
[balthasar.local:04079] coll:find_available: querying coll component basic
[balthasar.local:04079] coll:find_available: coll component basic is available
[balthasar.local:04079] coll:find_available: querying coll component han
[balthasar.local:04079] coll:han:init_query: pick me! pick me!
[balthasar.local:04079] coll:find_available: coll component han is available
[balthasar.local:04079] coll:find_available: querying coll component inter
[balthasar.local:04079] coll:find_available: coll component inter is available
[balthasar.local:04079] coll:find_available: querying coll component libnbc
[balthasar.local:04079] coll:find_available: coll component libnbc is available
[balthasar.local:04079] coll:find_available: querying coll component self
[balthasar.local:04079] coll:find_available: coll component self is available
[balthasar.local:04079] coll:find_available: querying coll component sync
[balthasar.local:04079] coll:find_available: coll component sync is available
[balthasar.local:04079] coll:find_available: querying coll component tuned
[balthasar.local:04079] coll:find_available: coll component tuned is available
[balthasar.local:04079] coll:find_available: querying coll component ftagree
[balthasar.local:04079] coll:find_available: coll component ftagree is available
[balthasar.local:04079] coll:find_available: querying coll component monitoring
[balthasar.local:04079] coll:find_available: coll component monitoring is not available
[balthasar.local:04079] mca: base: close: component monitoring closed
[balthasar.local:04079] mca: base: close: unloading component monitoring
[balthasar.local:04079] coll:find_available: querying coll component sm
[balthasar.local:04079] coll:sm:init_query: pick me! pick me!
[balthasar.local:04079] coll:find_available: coll component sm is available
[balthasar.local:04079] mca: bml: Using self btl for send to [[40704,1],0] on node balthasar
[melchior.local:03126] coll:find_available: querying coll component adapt
[melchior.local:03126] coll:find_available: coll component adapt is available
[melchior.local:03126] coll:find_available: querying coll component basic
[melchior.local:03126] coll:find_available: coll component basic is available
[melchior.local:03126] coll:find_available: querying coll component han
[melchior.local:03126] coll:han:init_query: pick me! pick me!
[melchior.local:03126] coll:find_available: coll component han is available
[melchior.local:03126] coll:find_available: querying coll component inter
[melchior.local:03126] coll:find_available: coll component inter is available
[melchior.local:03126] coll:find_available: querying coll component libnbc
[melchior.local:03126] coll:find_available: coll component libnbc is available
[melchior.local:03126] coll:find_available: querying coll component self
[melchior.local:03126] coll:find_available: coll component self is available
[melchior.local:03126] coll:find_available: querying coll component sync
[melchior.local:03126] coll:find_available: coll component sync is available
[melchior.local:03126] coll:find_available: querying coll component tuned
[melchior.local:03126] coll:find_available: coll component tuned is available
[melchior.local:03126] coll:find_available: querying coll component ftagree
[melchior.local:03126] coll:find_available: coll component ftagree is available
[melchior.local:03126] coll:find_available: querying coll component monitoring
[melchior.local:03126] coll:find_available: coll component monitoring is not available
[melchior.local:03126] mca: base: close: component monitoring closed
[melchior.local:03126] mca: base: close: unloading component monitoring
[melchior.local:03126] coll:find_available: querying coll component sm
[melchior.local:03126] coll:sm:init_query: pick me! pick me!
[melchior.local:03126] coll:find_available: coll component sm is available
[melchior.local:03126] mca: bml: Using self btl for send to [[40704,1],1] on node melchior
[balthasar.local:04079] base_help_aggregate=true (default)
[balthasar.local:04079] mca_base_param_files=/opt/homebrew/etc/openmpi-mca-params.conf (default)
[balthasar.local:04079] mca_param_files=/opt/homebrew/etc/openmpi-mca-params.conf (default)
[balthasar.local:04079] mca_base_override_param_file=/opt/homebrew/etc/openmpi-mca-params-override.conf (default)
[balthasar.local:04079] mca_base_suppress_override_warning=false (default)
[balthasar.local:04079] mca_base_param_file_prefix= (default)
[balthasar.local:04079] mca_base_envar_file_prefix= (default)
[balthasar.local:04079] mca_base_param_file_path=/opt/homebrew/Cellar/open-mpi/5.0.8/share/openmpi/amca-param-sets:/Users/muellerzr/mlx_stuff (default)
[balthasar.local:04079] mca_base_param_file_path_force= (default)
[balthasar.local:04079] opal_signal=6,10,8,11 (default)
[balthasar.local:04079] opal_stacktrace_output=stderr (default)
[balthasar.local:04079] opal_net_private_ipv4=10.0.0.0/8;172.16.0.0/12;192.168.0.0/16;169.254.0.0/16 (default)
[balthasar.local:04079] opal_set_max_sys_limits= (default)
[balthasar.local:04079] opal_var_dump_color=name=34,value=32,valid_values=36 (default)
[balthasar.local:04079] opal_built_with_cuda_support=false (default)
[balthasar.local:04079] opal_cuda_support=false (default)
[balthasar.local:04079] opal_warn_on_missing_libcuda=true (default)
[balthasar.local:04079] mpi_leave_pinned=auto (default)
[balthasar.local:04079] opal_leave_pinned=auto (default)
[balthasar.local:04079] mpi_leave_pinned_pipeline=false (default)
[balthasar.local:04079] opal_leave_pinned_pipeline=false (default)
[balthasar.local:04079] mpi_warn_on_fork=true (default)
[balthasar.local:04079] opal_abort_delay=0 (default)
[balthasar.local:04079] opal_abort_print_stack=true (environment)
[balthasar.local:04079] mca_base_env_list= (default)
[balthasar.local:04079] mca_base_env_list_delimiter=; (default)
[balthasar.local:04079] opal_max_thread_in_progress=1 (default)
[balthasar.local:04079] mca_base_component_path=/opt/homebrew/Cellar/open-mpi/5.0.8/lib/openmpi:/Users/muellerzr/.openmpi/components (default)
[balthasar.local:04079] mca_component_path=/opt/homebrew/Cellar/open-mpi/5.0.8/lib/openmpi:/Users/muellerzr/.openmpi/components (default)
[balthasar.local:04079] mca_base_component_show_load_errors=all (default)
[balthasar.local:04079] mca_component_show_load_errors=all (default)
[balthasar.local:04079] mca_base_component_track_load_errors=false (default)
[balthasar.local:04079] mca_base_component_disable_dlopen=false (default)
[balthasar.local:04079] mca_component_disable_dlopen=false (default)
[balthasar.local:04079] mca_base_verbose=stderr (default)
[balthasar.local:04079] mca_verbose=stderr (default)
[balthasar.local:04079] dl= (default)
[balthasar.local:04079] dl_base_verbose=error (default)
[balthasar.local:04079] dl_dlopen_filename_suffixes=.so,.dylib,.dll,.sl (default)
[balthasar.local:04079] mpi_ft_enable=false (default)
[balthasar.local:04079] mpi_ft_verbose=0 (default)
[balthasar.local:04079] mpi_ft_reliable_bcast=1 (default)
[balthasar.local:04079] mpi_ft_propagator_with_rbcast=false (default)
[balthasar.local:04079] mpi_ft_detector=false (default)
[balthasar.local:04079] mpi_ft_detector_thread=false (default)
[balthasar.local:04079] mpi_ft_detector_period=3.000000 (default)
[balthasar.local:04079] mpi_ft_detector_timeout=10.000000 (default)
[balthasar.local:04079] mpi_ft_detector_rdma_heartbeat=false (default)
[balthasar.local:04079] mpi_param_check=true (default)
[balthasar.local:04079] mpi_yield_when_idle=false (default)
[balthasar.local:04079] mpi_event_tick_rate=-1 (default)
[balthasar.local:04079] mpi_show_handle_leaks=false (default)
[balthasar.local:04079] mpi_no_free_handles=false (default)
[balthasar.local:04079] mpi_show_mpi_alloc_mem_leaks=0 (default)
[balthasar.local:04079] mpi_show_mca_params=all (environment)
[balthasar.local:04079] mpi_show_mca_params_file= (default)
[balthasar.local:04079] mpi_preconnect_all=false (default)
[balthasar.local:04079] mpi_have_sparse_group_storage=false (default)
[balthasar.local:04079] mpi_use_sparse_group_storage=false (default)
[balthasar.local:04079] mpi_cuda_support=false (default)
[balthasar.local:04079] mpi_built_with_cuda_support=false (default)
[balthasar.local:04079] mpi_add_procs_cutoff=0 (default)
[balthasar.local:04079] mpi_dynamics_enabled=true (default)
[balthasar.local:04079] async_mpi_init=false (default)
[balthasar.local:04079] async_mpi_finalize=false (default)
[balthasar.local:04079] mpi_abort_delay=0 (default)
[balthasar.local:04079] mpi_abort_print_stack=true (default)
[balthasar.local:04079] mpi_compat_mpi3=true (default)
[balthasar.local:04079] mpi_pmix_connect_timeout=0 (default)
[balthasar.local:04079] ompi_timing=false (default)
[balthasar.local:04079] ompi_stream_buffering=-1 (default)
[balthasar.local:04079] mpi_comm_verbose=0 (default)
[balthasar.local:04079] if= (default)
[balthasar.local:04079] if_base_verbose=error (default)
[balthasar.local:04079] if_base_do_not_resolve=false (default)
[balthasar.local:04079] if_base_retain_loopback=false (default)
[balthasar.local:04079] threads= (default)
[balthasar.local:04079] threads_base_verbose=error (default)
[balthasar.local:04079] threads_pthreads_yield_strategy=sched_yield (default)
[balthasar.local:04079] threads_pthreads_nanosleep_time=1 (default)
[balthasar.local:04079] hwloc= (default)
[balthasar.local:04079] hwloc_base_verbose=error (default)
[balthasar.local:04079] hwloc_base_mem_bind_failure_action=warn (default)
[balthasar.local:04079] memcpy= (default)
[balthasar.local:04079] memcpy_base_verbose=error (default)
[balthasar.local:04079] memchecker= (default)
[balthasar.local:04079] memchecker_base_verbose=error (default)
[balthasar.local:04079] backtrace= (default)
[balthasar.local:04079] backtrace_base_verbose=error (default)
[balthasar.local:04079] timer= (default)
[balthasar.local:04079] timer_base_verbose=error (default)
[balthasar.local:04079] timer_require_monotonic=true (default)
[balthasar.local:04079] shmem= (default)
[balthasar.local:04079] shmem_base_verbose=error (default)
[balthasar.local:04079] shmem_mmap_priority=50 (default)
[balthasar.local:04079] shmem_mmap_enable_nfs_warning=true (default)
[balthasar.local:04079] shmem_mmap_relocate_backing_file=0 (default)
[balthasar.local:04079] shmem_mmap_backing_file_base_dir=/dev/shm (default)
[balthasar.local:04079] reachable= (default)
[balthasar.local:04079] reachable_base_verbose=error (default)
[balthasar.local:04079] pmix= (default)
[balthasar.local:04079] pmix_base_verbose=error (default)
[balthasar.local:04079] pmix_base_async_modex=false (default)
[balthasar.local:04079] pmix_base_collect_data=true (default)
[balthasar.local:04079] pmix_base_exchange_timeout=-1 (default)
[balthasar.local:04079] accelerator= (default)
[balthasar.local:04079] accelerator_base_verbose=error (default)
[balthasar.local:04079] opal_event_include=select (default)
[balthasar.local:04079] event_external_include=select (default)
[balthasar.local:04079] opal_event_verbose=error (default)
[balthasar.local:04079] event_base_verbose=error (default)
[balthasar.local:04079] hook= (default)
[balthasar.local:04079] hook_base_verbose=error (default)
[balthasar.local:04079] hook_comm_method_verbose=0 (default)
[balthasar.local:04079] hook_comm_method_display= (default)
[balthasar.local:04079] hook_comm_method_max=12 (default)
[balthasar.local:04079] hook_comm_method_brief=false (default)
[balthasar.local:04079] hook_comm_method_fakefile= (default)
[balthasar.local:04079] op= (default)
[balthasar.local:04079] op_base_verbose=error (default)
[balthasar.local:04079] op_aarch64_hardware_available=1 (default)
[balthasar.local:04079] op_aarch64_double_supported=false (default)
[balthasar.local:04079] allocator= (default)
[balthasar.local:04079] allocator_base_verbose=error (default)
[balthasar.local:04079] allocator_bucket_num_buckets=30 (default)
[balthasar.local:04079] rcache= (default)
[balthasar.local:04079] rcache_base_verbose=error (default)
[balthasar.local:04079] rcache_grdma_print_stats=false (default)
[balthasar.local:04079] mpool= (default)
[balthasar.local:04079] mpool_base_verbose=error (default)
[balthasar.local:04079] mpool_hugepage_priority=50 (default)
[balthasar.local:04079] mpool_hugepage_page_size=2097152 (default)
[balthasar.local:04079] smsc= (default)
[balthasar.local:04079] smsc_base_verbose=error (default)
[balthasar.local:04079] bml= (default)
[balthasar.local:04079] bml_base_verbose=error (default)
[balthasar.local:04079] bml_r2_show_unreach_errors=true (default)
[balthasar.local:04079] btl= (default)
[balthasar.local:04079] btl_base_verbose=max (environment)
[balthasar.local:04079] btl_base_include= (default)
[balthasar.local:04079] btl_base_exclude= (default)
[balthasar.local:04079] btl_base_warn_peer_error=true (default)
[balthasar.local:04079] btl_base_warn_component_unused=1 (default)
[balthasar.local:04079] btl_self_free_list_num=0 (default)
[balthasar.local:04079] btl_self_free_list_max=64 (default)
[balthasar.local:04079] btl_self_free_list_inc=8 (default)
[balthasar.local:04079] btl_self_exclusivity=65536 (default)
[balthasar.local:04079] btl_self_atomic_flags= (default)
[balthasar.local:04079] btl_self_rndv_eager_limit=131072 (default)
[balthasar.local:04079] btl_self_eager_limit=1024 (default)
[balthasar.local:04079] btl_self_get_limit=18446744073709551615 (default)
[balthasar.local:04079] btl_self_get_alignment=0 (default)
[balthasar.local:04079] btl_self_put_limit=18446744073709551615 (default)
[balthasar.local:04079] btl_self_put_alignment=0 (default)
[balthasar.local:04079] btl_self_accelerator_max_send_size=0 (default)
[balthasar.local:04079] btl_self_max_send_size=16384 (default)
[balthasar.local:04079] btl_self_rdma_pipeline_send_length=2147483647 (default)
[balthasar.local:04079] btl_self_rdma_pipeline_frag_size=2147483647 (default)
[balthasar.local:04079] btl_self_min_rdma_pipeline_size=2147484671 (default)
[balthasar.local:04079] btl_self_latency=0 (default)
[balthasar.local:04079] btl_self_bandwidth=100 (default)
[balthasar.local:04079] btl_tcp_links=1 (default)
[balthasar.local:04079] btl_tcp_if_include= (default)
[balthasar.local:04079] btl_tcp_if_exclude=lo0,en12,bridge0,fe80::/10 (environment)
[balthasar.local:04079] btl_tcp_free_list_num=8 (default)
[balthasar.local:04079] btl_tcp_free_list_max=-1 (default)
[balthasar.local:04079] btl_tcp_free_list_inc=32 (default)
[balthasar.local:04079] btl_tcp_sndbuf=0 (default)
[balthasar.local:04079] btl_tcp_rcvbuf=0 (default)
[balthasar.local:04079] btl_tcp_endpoint_cache=30720 (default)
[balthasar.local:04079] btl_tcp_use_nagle=0 (default)
[balthasar.local:04079] btl_tcp_port_min_v4=1024 (default)
[balthasar.local:04079] btl_tcp_port_range_v4=64511 (default)
[balthasar.local:04079] btl_tcp_port_min_v6=1024 (default)
[balthasar.local:04079] btl_tcp_port_range_v6=64511 (default)
[balthasar.local:04079] btl_tcp_progress_thread=0 (default)
[balthasar.local:04079] btl_tcp_warn_all_unfound_interfaces=false (default)
[balthasar.local:04079] btl_tcp_exclusivity=100 (default)
[balthasar.local:04079] btl_tcp_flags=send,put,inplace,need-ack,need-csum,hetero-rdma (default)
[balthasar.local:04079] btl_tcp_atomic_flags= (default)
[balthasar.local:04079] btl_tcp_rndv_eager_limit=65536 (default)
[balthasar.local:04079] btl_tcp_eager_limit=65536 (default)
[balthasar.local:04079] btl_tcp_put_limit=18446744073709551615 (default)
[balthasar.local:04079] btl_tcp_put_alignment=0 (default)
[balthasar.local:04079] btl_tcp_accelerator_max_send_size=0 (default)
[balthasar.local:04079] btl_tcp_max_send_size=131072 (default)
[balthasar.local:04079] btl_tcp_rdma_pipeline_send_length=131072 (default)
[balthasar.local:04079] btl_tcp_rdma_pipeline_frag_size=2147482624 (default)
[balthasar.local:04079] btl_tcp_min_rdma_pipeline_size=196608 (default)
[balthasar.local:04079] btl_tcp_latency=0 (default)
[balthasar.local:04079] btl_tcp_bandwidth=0 (default)
[balthasar.local:04079] btl_tcp_disable_family=0 (default)
[balthasar.local:04079] pml= (default)
[balthasar.local:04079] pml_base_verbose=error (default)
[balthasar.local:04079] pml_base_bsend_allocator=basic (default)
[balthasar.local:04079] pml_base_wrapper= (default)
[balthasar.local:04079] pml_wrapper= (default)
[balthasar.local:04079] pml_base_check_pml=true (default)
[balthasar.local:04079] pml_ob1_verbose=0 (default)
[balthasar.local:04079] pml_ob1_free_list_num=4 (default)
[balthasar.local:04079] pml_ob1_free_list_max=-1 (default)
[balthasar.local:04079] pml_ob1_free_list_inc=64 (default)
[balthasar.local:04079] pml_ob1_priority=20 (default)
[balthasar.local:04079] pml_ob1_send_pipeline_depth=3 (default)
[balthasar.local:04079] pml_ob1_recv_pipeline_depth=4 (default)
[balthasar.local:04079] pml_ob1_max_rdma_per_request=4 (default)
[balthasar.local:04079] pml_ob1_max_send_per_range=4 (default)
[balthasar.local:04079] pml_ob1_unexpected_limit=128 (default)
[balthasar.local:04079] pml_ob1_use_all_rdma=false (default)
[balthasar.local:04079] pml_ob1_allocator=bucket (default)
[balthasar.local:04079] pml_ob1_accelerator_events_max=400 (default)
[balthasar.local:04079] coll= (default)
[balthasar.local:04079] coll_base_verbose=max (environment)
[balthasar.local:04079] coll_adapt_priority=0 (default)
[balthasar.local:04079] coll_adapt_verbose=100 (default)
[balthasar.local:04079] coll_adapt_context_free_list_min=64 (default)
[balthasar.local:04079] coll_adapt_context_free_list_max=1024 (default)
[balthasar.local:04079] coll_adapt_context_free_list_inc=32 (default)
[balthasar.local:04079] coll_adapt_bcast_algorithm=1 (default)
[balthasar.local:04079] coll_adapt_bcast_segment_size=0 (default)
[balthasar.local:04079] coll_adapt_bcast_max_send_requests=2 (default)
[balthasar.local:04079] coll_adapt_bcast_max_recv_requests=3 (default)
[balthasar.local:04079] coll_adapt_bcast_synchronous_send=true (default)
[balthasar.local:04079] coll_adapt_reduce_algorithm=1 (default)
[balthasar.local:04079] coll_adapt_reduce_segment_size=163740 (default)
[balthasar.local:04079] coll_adapt_reduce_max_send_requests=2 (default)
[balthasar.local:04079] coll_adapt_reduce_max_recv_requests=3 (default)
[balthasar.local:04079] coll_adapt_inbuf_free_list_min=10 (default)
[balthasar.local:04079] coll_adapt_inbuf_free_list_max=10000 (default)
[balthasar.local:04079] coll_adapt_inbuf_free_list_inc=10 (default)
[balthasar.local:04079] coll_adapt_reduce_synchronous_send=true (default)
[balthasar.local:04079] coll_basic_priority=10 (default)
[balthasar.local:04079] coll_basic_crossover=4 (default)
[balthasar.local:04079] coll_han_priority=35 (default)
[balthasar.local:04079] coll_han_verbose=0 (default)
[balthasar.local:04079] coll_han_bcast_segsize=65536 (default)
[balthasar.local:04079] coll_han_bcast_up_module=self (default)
[balthasar.local:04079] coll_han_bcast_low_module=self (default)
[balthasar.local:04079] coll_han_reduce_segsize=65536 (default)
[balthasar.local:04079] coll_han_reduce_up_module=self (default)
[balthasar.local:04079] coll_han_reduce_low_module=self (default)
[balthasar.local:04079] coll_han_allreduce_segsize=65536 (default)
[balthasar.local:04079] coll_han_allreduce_up_module=self (default)
[balthasar.local:04079] coll_han_allreduce_low_module=self (default)
[balthasar.local:04079] coll_han_allgather_up_module=self (default)
[balthasar.local:04079] coll_han_allgather_low_module=self (default)
[balthasar.local:04079] coll_han_gather_up_module=self (default)
[balthasar.local:04079] coll_han_gather_low_module=self (default)
[balthasar.local:04079] coll_han_scatter_up_module=self (default)
[balthasar.local:04079] coll_han_scatter_low_module=self (default)
[balthasar.local:04079] coll_han_reproducible=false (default)
[balthasar.local:04079] coll_han_use_allgather_algorithm=default (default)
[balthasar.local:04079] coll_han_use_allreduce_algorithm=default (default)
[balthasar.local:04079] coll_han_use_barrier_algorithm=default (default)
[balthasar.local:04079] coll_han_use_bcast_algorithm=default (default)
[balthasar.local:04079] coll_han_use_gather_algorithm=default (default)
[balthasar.local:04079] coll_han_use_reduce_algorithm=default (default)
[balthasar.local:04079] coll_han_use_scatter_algorithm=default (default)
[balthasar.local:04079] coll_han_use_simple_allgather=false (default)
[balthasar.local:04079] coll_han_use_simple_allreduce=false (default)
[balthasar.local:04079] coll_han_use_simple_bcast=false (default)
[balthasar.local:04079] coll_han_use_simple_gather=true (default)
[balthasar.local:04079] coll_han_use_simple_reduce=false (default)
[balthasar.local:04079] coll_han_use_simple_scatter=false (default)
[balthasar.local:04079] coll_han_allgather_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_allgather_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_allgather_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_allgatherv_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_allgatherv_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_allgatherv_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_allreduce_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_allreduce_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_allreduce_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_barrier_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_barrier_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_barrier_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_bcast_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_bcast_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_bcast_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_gather_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_gather_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_gather_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_reduce_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_reduce_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_reduce_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_scatter_dynamic_intra_node_module=3 (default)
[balthasar.local:04079] coll_han_scatter_dynamic_inter_node_module=3 (default)
[balthasar.local:04079] coll_han_scatter_dynamic_global_communicator_module=6 (default)
[balthasar.local:04079] coll_han_use_dynamic_file_rules=false (default)
[balthasar.local:04079] coll_han_dynamic_rules_filename= (default)
[balthasar.local:04079] coll_han_dump_dynamic_rules=false (default)
[balthasar.local:04079] coll_han_max_dynamic_errors=10 (default)
[balthasar.local:04079] coll_inter_priority=40 (default)
[balthasar.local:04079] coll_inter_verbose=0 (default)
[balthasar.local:04079] coll_libnbc_priority=10 (default)
[balthasar.local:04079] coll_libnbc_ibcast_skip_dt_decision=true (default)
[balthasar.local:04079] coll_libnbc_iallgather_algorithm=ignore (default)
[balthasar.local:04079] coll_libnbc_iallreduce_algorithm=ignore (default)
[balthasar.local:04079] coll_libnbc_ibcast_algorithm=ignore (default)
[balthasar.local:04079] coll_libnbc_ibcast_knomial_radix=4 (default)
[balthasar.local:04079] coll_libnbc_iexscan_algorithm=ignore (default)
[balthasar.local:04079] coll_libnbc_ireduce_algorithm=ignore (default)
[balthasar.local:04079] coll_libnbc_iscan_algorithm=ignore (default)
[balthasar.local:04079] coll_self_priority=75 (default)
[balthasar.local:04079] coll_sync_priority=50 (default)
[balthasar.local:04079] coll_sync_barrier_before=0 (default)
[balthasar.local:04079] coll_sync_barrier_after=0 (default)
[balthasar.local:04079] coll_tuned_priority=30 (default)
[balthasar.local:04079] coll_tuned_init_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_init_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_alltoall_small_msg=200 (default)
[balthasar.local:04079] coll_tuned_alltoall_intermediate_msg=3000 (default)
[balthasar.local:04079] coll_tuned_use_dynamic_rules=false (default)
[balthasar.local:04079] coll_tuned_dynamic_rules_filename= (default)
[balthasar.local:04079] coll_tuned_allreduce_algorithm_count=7 (default)
[balthasar.local:04079] coll_tuned_allreduce_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_allreduce_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_allreduce_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_allreduce_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_alltoall_algorithm_count=6 (default)
[balthasar.local:04079] coll_tuned_alltoall_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_alltoall_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_alltoall_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_alltoall_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_alltoall_large_msg=3000 (default)
[balthasar.local:04079] coll_tuned_alltoall_min_procs=0 (default)
[balthasar.local:04079] coll_tuned_alltoall_algorithm_max_requests=0 (default)
[balthasar.local:04079] coll_tuned_allgather_algorithm_count=8 (default)
[balthasar.local:04079] coll_tuned_allgather_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_allgather_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_allgather_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_allgather_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_allgatherv_algorithm_count=7 (default)
[balthasar.local:04079] coll_tuned_allgatherv_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_allgatherv_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_allgatherv_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_allgatherv_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_alltoallv_algorithm_count=3 (default)
[balthasar.local:04079] coll_tuned_alltoallv_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_barrier_algorithm_count=7 (default)
[balthasar.local:04079] coll_tuned_barrier_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_bcast_algorithm_count=10 (default)
[balthasar.local:04079] coll_tuned_bcast_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_bcast_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_bcast_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_bcast_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_bcast_algorithm_knomial_radix=4 (default)
[balthasar.local:04079] coll_tuned_reduce_algorithm_count=8 (default)
[balthasar.local:04079] coll_tuned_reduce_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_reduce_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_reduce_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_reduce_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_reduce_algorithm_max_requests=0 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_count=5 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm_count=5 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_gather_algorithm_count=4 (default)
[balthasar.local:04079] coll_tuned_gather_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_gather_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_gather_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_gather_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_scatter_algorithm_count=4 (default)
[balthasar.local:04079] coll_tuned_scatter_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_scatter_algorithm_segmentsize=0 (default)
[balthasar.local:04079] coll_tuned_scatter_algorithm_tree_fanout=4 (default)
[balthasar.local:04079] coll_tuned_scatter_algorithm_chain_fanout=4 (default)
[balthasar.local:04079] coll_tuned_scatter_min_procs=0 (default)
[balthasar.local:04079] coll_tuned_scatter_algorithm_max_requests=0 (default)
[balthasar.local:04079] coll_tuned_scatter_intermediate_msg=0 (default)
[balthasar.local:04079] coll_tuned_scatter_large_msg=0 (default)
[balthasar.local:04079] coll_tuned_exscan_algorithm_count=3 (default)
[balthasar.local:04079] coll_tuned_exscan_algorithm=ignore (default)
[balthasar.local:04079] coll_tuned_scan_algorithm_count=3 (default)
[balthasar.local:04079] coll_tuned_scan_algorithm=ignore (default)
[balthasar.local:04079] coll_ftagree_priority=30 (default)
[balthasar.local:04079] coll_ftagree_agreement=1 (default)
[balthasar.local:04079] coll_ftagree_era_topology=1 (default)
[balthasar.local:04079] coll_ftagree_era_rebuild=0 (default)
[balthasar.local:04079] coll_sm_priority=0 (default)
[balthasar.local:04079] coll_sm_control_size=4096 (default)
[balthasar.local:04079] coll_sm_fragment_size=8192 (default)
[balthasar.local:04079] coll_sm_comm_in_use_flags=2 (default)
[balthasar.local:04079] coll_sm_comm_num_segments=8 (default)
[balthasar.local:04079] coll_sm_tree_degree=4 (default)
[balthasar.local:04079] coll_sm_info_num_procs=4 (default)
[balthasar.local:04079] coll_sm_shared_mem_used_data=548864 (default)
[balthasar.local:04079] osc= (default)
[balthasar.local:04079] osc_base_verbose=error (default)
[balthasar.local:04079] osc_sm_backing_directory=/var/folders/t6/dkzc3gr14b34jmq69r00103m0000gn/T//prterun.balthasar.4077.501/1/0 (default)
[balthasar.local:04079] osc_sm_priority=100 (default)
[balthasar.local:04079] osc_rdma_no_locks=false (default)
[balthasar.local:04079] osc_rdma_acc_single_intrinsic=false (default)
[balthasar.local:04079] osc_rdma_acc_use_amo=true (default)
[balthasar.local:04079] osc_rdma_buffer_size=32768 (default)
[balthasar.local:04079] osc_rdma_max_attach=64 (default)
[balthasar.local:04079] osc_rdma_priority=20 (default)
[balthasar.local:04079] osc_rdma_locking_mode=two_level (default)
[balthasar.local:04079] osc_rdma_btls=ugni,uct,ofi (default)
[balthasar.local:04079] osc_rdma_backing_directory=/var/folders/t6/dkzc3gr14b34jmq69r00103m0000gn/T//prterun.balthasar.4077.501/1/0 (default)
[balthasar.local:04079] osc_rdma_network_max_amo=32 (default)
[balthasar.local:04079] osc_rdma_minimum_memory_alignment=16384 (default)
[balthasar.local:04079] btl_tcp_bandwidth_en0=100 (default)
[balthasar.local:04079] btl_tcp_latency_en0=100 (default)
[balthasar.local:04079] btl_tcp_bandwidth_en0:0=100 (default)
[balthasar.local:04079] btl_tcp_latency_en0:0=100 (default)
[balthasar.local:04079] part= (default)
[balthasar.local:04079] part_base_verbose=error (default)
[balthasar.local:04079] part_persist_free_list_num=4 (default)
[balthasar.local:04079] part_persist_free_list_max=-1 (default)
[balthasar.local:04079] part_persist_free_list_inc=64 (default)
[balthasar.local:04079] coll:base:comm_select: new communicator: MPI_COMM_WORLD (cid 0)
[balthasar.local:04079] coll:base:comm_select: Checking all available modules
[balthasar.local:04079] coll:adapt:comm_query (0/MPI_COMM_WORLD): pick me! pick me!
[balthasar.local:04079] coll:base:comm_select: component available: adapt, priority: 0
[balthasar.local:04079] coll:base:comm_select: component available: basic, priority: 10
[balthasar.local:04079] coll:han:comm_query (0/MPI_COMM_WORLD): pick me! pick me!
[balthasar.local:04079] coll:base:comm_select: component available: han, priority: 35
[balthasar.local:04079] coll:base:comm_select: component not available: inter
[balthasar.local:04079] coll:base:comm_select: component disqualified: inter (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component available: libnbc, priority: 10
[balthasar.local:04079] coll:base:comm_select: component not available: self
[balthasar.local:04079] coll:base:comm_select: component disqualified: self (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component not available: sync
[balthasar.local:04079] coll:base:comm_select: component disqualified: sync (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component available: tuned, priority: 30
[balthasar.local:04079] coll:base:comm_select: component not available: ftagree
[balthasar.local:04079] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0)
[balthasar.local:04079] coll:sm:comm_query (0/MPI_COMM_WORLD): intercomm, comm is too small, or not all peers local; disqualifying myself
[balthasar.local:04079] coll:base:comm_select: component not available: sm
[balthasar.local:04079] coll:base:comm_select: component disqualified: sm (priority -1 < 0)
[balthasar.local:04079] (0/MPI_COMM_WORLD): no underlying reduce; disqualifying myself
[balthasar.local:04079] coll:base:comm_select: selecting adapt, priority 0, Disabled
[balthasar.local:04079] coll:base:comm_select: selecting basic, priority 10, Enabled
[balthasar.local:04079] coll:base:comm_select: selecting libnbc, priority 10, Enabled
[balthasar.local:04079] coll:base:comm_select: selecting tuned, priority 30, Enabled
[balthasar.local:04079] coll:han:get_all_coll_modules HAN found module basic with id 1 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD)
[balthasar.local:04079] coll:han:get_all_coll_modules HAN found module libnbc with id 2 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD)
[balthasar.local:04079] coll:han:get_all_coll_modules HAN found module tuned with id 3 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD)
[balthasar.local:04079] coll:han:get_all_coll_modules HAN sub-communicator modules storage for topological level 2 (global_communicator) gets 4 modules for communicator (0/MPI_COMM_WORLD)
[balthasar.local:04079] coll:han:reduce_reproducible: fallback on tuned
[balthasar.local:04079] coll:han:allreduce_reproducible: fallback on tuned
[balthasar.local:04079] coll:base:comm_select: selecting han, priority 35, Enabled
[balthasar.local:04079] coll:base:comm_select: new communicator: MPI_COMM_SELF (cid 1)
[balthasar.local:04079] coll:base:comm_select: Checking all available modules
[balthasar.local:04079] coll:adapt:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small; disqualifying myself
[balthasar.local:04079] coll:base:comm_select: component not available: adapt
[balthasar.local:04079] coll:base:comm_select: component disqualified: adapt (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component available: basic, priority: 10
[balthasar.local:04079] coll:han:comm_query (1/MPI_COMM_SELF): comm is too small; disqualifying myself
[balthasar.local:04079] coll:base:comm_select: component not available: han
[balthasar.local:04079] coll:base:comm_select: component disqualified: han (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component not available: inter
[balthasar.local:04079] coll:base:comm_select: component disqualified: inter (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component available: libnbc, priority: 10
[balthasar.local:04079] coll:base:comm_select: component available: self, priority: 75
[balthasar.local:04079] coll:base:comm_select: component not available: sync
[balthasar.local:04079] coll:base:comm_select: component disqualified: sync (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component not available: tuned
[balthasar.local:04079] coll:base:comm_select: component disqualified: tuned (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: component not available: ftagree
[balthasar.local:04079] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0)
[balthasar.local:04079] coll:sm:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small, or not all peers local; disqualifying myself
[balthasar.local:04079] coll:base:comm_select: component not available: sm
[balthasar.local:04079] coll:base:comm_select: component disqualified: sm (priority -1 < 0)
[balthasar.local:04079] coll:base:comm_select: selecting basic, priority 10, Enabled
[balthasar.local:04079] coll:base:comm_select: selecting libnbc, priority 10, Enabled
[balthasar.local:04079] coll:base:comm_select: selecting self, priority 75, Enabled
[melchior.local:03126] coll:base:comm_select: new communicator: MPI_COMM_WORLD (cid 0)
[melchior.local:03126] coll:base:comm_select: Checking all available modules
[melchior.local:03126] coll:adapt:comm_query (0/MPI_COMM_WORLD): pick me! pick me!
[melchior.local:03126] coll:base:comm_select: component available: adapt, priority: 0
[melchior.local:03126] coll:base:comm_select: component available: basic, priority: 10
[melchior.local:03126] coll:han:comm_query (0/MPI_COMM_WORLD): pick me! pick me!
[melchior.local:03126] coll:base:comm_select: component available: han, priority: 35
[melchior.local:03126] coll:base:comm_select: component not available: inter
[melchior.local:03126] coll:base:comm_select: component disqualified: inter (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component available: libnbc, priority: 10
[melchior.local:03126] coll:base:comm_select: component not available: self
[melchior.local:03126] coll:base:comm_select: component disqualified: self (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component not available: sync
[melchior.local:03126] coll:base:comm_select: component disqualified: sync (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component available: tuned, priority: 30
[melchior.local:03126] coll:base:comm_select: component not available: ftagree
[melchior.local:03126] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0)
[melchior.local:03126] coll:sm:comm_query (0/MPI_COMM_WORLD): intercomm, comm is too small, or not all peers local; disqualifying myself
[melchior.local:03126] coll:base:comm_select: component not available: sm
[melchior.local:03126] coll:base:comm_select: component disqualified: sm (priority -1 < 0)
[melchior.local:03126] (0/MPI_COMM_WORLD): no underlying reduce; disqualifying myself
[melchior.local:03126] coll:base:comm_select: selecting adapt, priority 0, Disabled
[melchior.local:03126] coll:base:comm_select: selecting basic, priority 10, Enabled
[melchior.local:03126] coll:base:comm_select: selecting libnbc, priority 10, Enabled
[melchior.local:03126] coll:base:comm_select: selecting tuned, priority 30, Enabled
[melchior.local:03126] coll:han:get_all_coll_modules HAN found module basic with id 1 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD)
[melchior.local:03126] coll:han:get_all_coll_modules HAN found module libnbc with id 2 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD)
[melchior.local:03126] coll:han:get_all_coll_modules HAN found module tuned with id 3 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD)
[melchior.local:03126] coll:han:get_all_coll_modules HAN sub-communicator modules storage for topological level 2 (global_communicator) gets 4 modules for communicator (0/MPI_COMM_WORLD)
[melchior.local:03126] coll:base:comm_select: selecting han, priority 35, Enabled
[melchior.local:03126] coll:base:comm_select: new communicator: MPI_COMM_SELF (cid 1)
[melchior.local:03126] coll:base:comm_select: Checking all available modules
[melchior.local:03126] coll:adapt:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small; disqualifying myself
[melchior.local:03126] coll:base:comm_select: component not available: adapt
[melchior.local:03126] coll:base:comm_select: component disqualified: adapt (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component available: basic, priority: 10
[melchior.local:03126] coll:han:comm_query (1/MPI_COMM_SELF): comm is too small; disqualifying myself
[melchior.local:03126] coll:base:comm_select: component not available: han
[melchior.local:03126] coll:base:comm_select: component disqualified: han (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component not available: inter
[melchior.local:03126] coll:base:comm_select: component disqualified: inter (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component available: libnbc, priority: 10
[melchior.local:03126] coll:base:comm_select: component available: self, priority: 75
[melchior.local:03126] coll:base:comm_select: component not available: sync
[melchior.local:03126] coll:base:comm_select: component disqualified: sync (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component not available: tuned
[melchior.local:03126] coll:base:comm_select: component disqualified: tuned (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: component not available: ftagree
[melchior.local:03126] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0)
[melchior.local:03126] coll:sm:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small, or not all peers local; disqualifying myself
[melchior.local:03126] coll:base:comm_select: component not available: sm
[melchior.local:03126] coll:base:comm_select: component disqualified: sm (priority -1 < 0)
[melchior.local:03126] coll:base:comm_select: selecting basic, priority 10, Enabled
[melchior.local:03126] coll:base:comm_select: selecting libnbc, priority 10, Enabled
[melchior.local:03126] coll:base:comm_select: selecting self, priority 75, Enabled
[balthasar.local:04079] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective
[balthasar.local:04079] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective
[balthasar.local:04079] coll:han:get_algorithm allreduce size:4 algorithm:0 default
[balthasar.local:04079] mca: bml: Using tcp btl for send to [[40704,1],1] on node melchior
[balthasar.local:04079] mca: bml: Using tcp btl for send to [[40704,1],1] on node melchior
[balthasar.local:04079] btl: tcp: attempting to connect() to [[40704,1],1] address 192.168.68.53 on port 1024
[balthasar.local:04079] btl:tcp: would block, so allowing background progress
[balthasar.local:04079] btl:tcp: connect() to 192.168.68.53:1024 completed (complete_connect), sending connect ACK
[melchior.local:03126] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective
[melchior.local:03126] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective
[melchior.local:03126] mca: bml: Using tcp btl for send to [[40704,1],0] on node balthasar
[melchior.local:03126] mca: bml: Using tcp btl for send to [[40704,1],0] on node balthasar
[melchior.local:03126] btl: tcp: attempting to connect() to [[40704,1],0] address 192.168.68.80 on port 1024
[melchior.local:03126] btl:tcp: would block, so allowing background progress
[melchior.local:03126] btl:tcp: now connected to 192.168.68.80, process [[40704,1],0]
[melchior.local:03126] btl: tcp: attempting to connect() to [[40704,1],0] address fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5 on port 1024
[melchior.local:03126] btl:tcp: would block, so allowing background progress
[balthasar.local:04079] btl:tcp: now connected to fdcd:ee7b:759e:1744:a200:7dd3:e2d:7f69, process [[40704,1],1]
[melchior.local:03126] btl:tcp: connect() to fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5:1024 completed (complete_connect), sending connect ACK
[balthasar][[40704,1],0][btl_tcp_frag.c:228:mca_btl_tcp_frag_recv] mca_btl_tcp_frag_recv: readv error (0x17ffffb38, 58568)
Bad address(1)
[balthasar:00000] *** An error occurred in Socket closed
[balthasar:00000] *** reported by process [2667577345,0]
[balthasar:00000] *** on a NULL communicator
[balthasar:00000] *** Unknown error
[balthasar:00000] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort,
[balthasar:00000] *** and MPI will try to terminate your MPI job as well)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment