Created
August 28, 2025 00:47
-
-
Save muellerzr/62291eb5eb9f1948de40889dd702c046 to your computer and use it in GitHub Desktop.
mlx issue part 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright © 2024 Apple Inc. | |
| """ | |
| Run with: | |
| ``` | |
| OMPI_MCA_mpi_abort_print_stack=1 mlx.launch --hostfile hosts.json --backend mpi --mpi-arg "--mca btl_tcp_if_exclude lo0,169.254.0.0/16,fe80::/10 --mca mpi_show_mca_params all --mca coll_base_verbose 100 --mca btl_base_verbose 100" script.py | |
| ``` | |
| Make sure you can run MLX over MPI on two hosts. For more information see the | |
| documentation: | |
| https://ml-explore.github.io/mlx/build/html/usage/distributed.html). | |
| """ | |
| import argparse | |
| import json | |
| import resource | |
| from pathlib import Path | |
| import mlx.core as mx | |
| from huggingface_hub import snapshot_download | |
| from mlx.utils import tree_flatten | |
| from mlx_lm import load, stream_generate | |
| from mlx_lm.utils import load_model, load_tokenizer | |
| # Needed for 8 bit model | |
| resource.setrlimit(resource.RLIMIT_NOFILE, (2048, 4096)) | |
| def download(repo: str, allow_patterns: list[str]) -> Path: | |
| return Path( | |
| snapshot_download( | |
| repo, | |
| allow_patterns=allow_patterns, | |
| ) | |
| ) | |
| def shard_and_load(repo): | |
| # Get model path with everything but weight safetensors | |
| # model_path = download( | |
| # args.model, | |
| # allow_patterns=["*.json", "*.py", "tokenizer.model", "*.tiktoken", "*.txt"], | |
| # ) | |
| model_path = Path("/Users/muellerzr/mlx_stuff/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx") | |
| # Lazy load and shard model to figure out | |
| # which weights we need | |
| model, config = load_model(model_path, lazy=True, strict=False) | |
| group = mx.distributed.init() | |
| rank = group.rank() | |
| model.model.pipeline(group) | |
| # Figure out which files we need for the local shard | |
| with open(model_path / "model.safetensors.index.json", "r") as fid: | |
| weight_index = json.load(fid)["weight_map"] | |
| local_files = set() | |
| for k, _ in tree_flatten(model.parameters()): | |
| local_files.add(weight_index[k]) | |
| # Download weights for local shard | |
| # download(args.model, allow_patterns=local_files) | |
| # Load and shard the model, and load the weights | |
| tokenizer = load_tokenizer( | |
| model_path, | |
| {"trust_remote_code": True}, | |
| eos_token_ids=config.get("eos_token_id", None), | |
| ) | |
| model, _ = load_model(model_path, lazy=True, strict=False) | |
| model.model.pipeline(group) | |
| mx.eval(model.parameters()) | |
| # Synchronize processes before generation to avoid timeout if downloading | |
| # model for the first time. | |
| mx.eval(mx.distributed.all_sum(mx.array(1.0), stream=mx.cpu)) | |
| return model, tokenizer | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="LLM pipelined inference example") | |
| parser.add_argument( | |
| "--model", | |
| default="DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx", | |
| help="HF repo or path to local model.", | |
| ) | |
| parser.add_argument( | |
| "--prompt", | |
| "-p", | |
| default="What is the square root of 4", | |
| help="Message to be processed by the model ('-' reads from stdin)", | |
| ) | |
| parser.add_argument( | |
| "--max-tokens", | |
| "-m", | |
| type=int, | |
| default=256, | |
| help="Maximum number of tokens to generate", | |
| ) | |
| args = parser.parse_args() | |
| group = mx.distributed.init() | |
| rank = group.rank() | |
| def rprint(*args, **kwargs): | |
| if rank == 0: | |
| print(*args, **kwargs) | |
| model, tokenizer = shard_and_load(args.model) | |
| messages = [{"role": "user", "content": args.prompt}] | |
| prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
| for response in stream_generate( | |
| model, tokenizer, prompt, max_tokens=args.max_tokens | |
| ): | |
| rprint(response.text, end="", flush=True) | |
| rprint() | |
| rprint("=" * 10) | |
| rprint( | |
| f"Prompt: {response.prompt_tokens} tokens, " | |
| f"{response.prompt_tps:.3f} tokens-per-sec" | |
| ) | |
| rprint( | |
| f"Generation: {response.generation_tokens} tokens, " | |
| f"{response.generation_tps:.3f} tokens-per-sec" | |
| ) | |
| rprint(f"Peak memory: {response.peak_memory:.3f} GB") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [balthasar.local:04077] SET DYLD_LIBRARY_PATH=/opt/homebrew/lib | |
| [balthasar.local:04079] mca: base: components_register: registering framework btl components | |
| [balthasar.local:04079] mca: base: components_register: found loaded component self | |
| [balthasar.local:04079] mca: base: components_register: component self register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component sm | |
| [balthasar.local:04079] mca: base: components_register: component sm register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component tcp | |
| [balthasar.local:04079] mca: base: components_register: component tcp register function successful | |
| [balthasar.local:04079] mca: base: components_open: opening btl components | |
| [balthasar.local:04079] mca: base: components_open: found loaded component self | |
| [balthasar.local:04079] mca: base: components_open: component self open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component sm | |
| [balthasar.local:04079] mca: base: components_open: component sm open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component tcp | |
| [balthasar.local:04079] mca: base: components_open: component tcp open function successful | |
| [balthasar.local:04079] mca: base: components_register: registering framework coll components | |
| [balthasar.local:04079] mca: base: components_register: found loaded component adapt | |
| [balthasar.local:04079] mca: base: components_register: component adapt register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component basic | |
| [balthasar.local:04079] mca: base: components_register: component basic register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component han | |
| [balthasar.local:04079] mca: base: components_register: component han register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component inter | |
| [balthasar.local:04079] mca: base: components_register: component inter register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component libnbc | |
| [balthasar.local:04079] mca: base: components_register: component libnbc register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component self | |
| [balthasar.local:04079] mca: base: components_register: component self register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component sync | |
| [balthasar.local:04079] mca: base: components_register: component sync register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component tuned | |
| [balthasar.local:04079] mca: base: components_register: component tuned register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component ftagree | |
| [balthasar.local:04079] mca: base: components_register: component ftagree register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component monitoring | |
| [balthasar.local:04079] mca: base: components_register: component monitoring register function successful | |
| [balthasar.local:04079] mca: base: components_register: found loaded component sm | |
| [balthasar.local:04079] mca: base: components_register: component sm register function successful | |
| [balthasar.local:04079] mca: base: components_open: opening coll components | |
| [balthasar.local:04079] mca: base: components_open: found loaded component adapt | |
| [balthasar.local:04079] mca: base: components_open: component adapt open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component basic | |
| [balthasar.local:04079] mca: base: components_open: found loaded component han | |
| [balthasar.local:04079] mca: base: components_open: component han open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component inter | |
| [balthasar.local:04079] mca: base: components_open: found loaded component libnbc | |
| [balthasar.local:04079] mca: base: components_open: component libnbc open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component self | |
| [balthasar.local:04079] mca: base: components_open: found loaded component sync | |
| [balthasar.local:04079] mca: base: components_open: found loaded component tuned | |
| [balthasar.local:04079] mca: base: components_open: component tuned open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component ftagree | |
| [balthasar.local:04079] mca: base: components_open: found loaded component monitoring | |
| [balthasar.local:04079] mca: base: components_open: component monitoring open function successful | |
| [balthasar.local:04079] mca: base: components_open: found loaded component sm | |
| [balthasar.local:04079] select: initializing btl component self | |
| [balthasar.local:04079] select: init of component self returned success | |
| [balthasar.local:04079] select: initializing btl component sm | |
| [balthasar.local:04079] select: init of component sm returned failure | |
| [balthasar.local:04079] mca: base: close: component sm closed | |
| [balthasar.local:04079] mca: base: close: unloading component sm | |
| [balthasar.local:04079] select: initializing btl component tcp | |
| [balthasar.local:04079] btl: tcp: Using interface: lo0 | |
| [balthasar.local:04079] btl: tcp: Searching for exclude address+prefix: 169.254.0.0 / 16 | |
| [balthasar.local:04079] btl: tcp: Found match: 169.254.175.10 (en12) | |
| [balthasar.local:04079] btl: tcp: Found match: 169.254.195.121 (bridge0) | |
| [balthasar.local:04079] btl: tcp: Using interface: fe80::/10 | |
| [balthasar.local:04079] btl:tcp: 0x13c8a2e40: if en0 kidx 24 cnt 0 addr fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5 IPv6 bw 100 lt 100 | |
| [balthasar.local:04079] btl:tcp: 0x13c8a31a0: if en0 kidx 4 cnt 0 addr 192.168.68.80 IPv4 bw 100 lt 100 | |
| [balthasar.local:04079] btl:tcp: Attempting to bind to AF_INET port 1024 | |
| [balthasar.local:04079] btl:tcp: Successfully bound to AF_INET port 1024 | |
| [balthasar.local:04079] btl:tcp: my listening v4 socket is 0.0.0.0:1024 | |
| [balthasar.local:04079] btl:tcp: Attempting to bind to AF_INET6 port 1024 | |
| [balthasar.local:04079] btl:tcp: Successfully bound to AF_INET6 port 1024 | |
| [balthasar.local:04079] btl:tcp: my listening v6 socket port is 1024 | |
| [balthasar.local:04079] btl: tcp: exchange: 0 24 IPv6 fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5 | |
| [balthasar.local:04079] btl: tcp: exchange: 1 4 IPv4 192.168.68.80 | |
| [balthasar.local:04079] select: init of component tcp returned success | |
| [melchior.local:03126] mca: base: components_register: registering framework btl components | |
| [melchior.local:03126] mca: base: components_register: found loaded component self | |
| [melchior.local:03126] mca: base: components_register: component self register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component sm | |
| [melchior.local:03126] mca: base: components_register: component sm register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component tcp | |
| [melchior.local:03126] mca: base: components_register: component tcp register function successful | |
| [melchior.local:03126] mca: base: components_open: opening btl components | |
| [melchior.local:03126] mca: base: components_open: found loaded component self | |
| [melchior.local:03126] mca: base: components_open: component self open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component sm | |
| [melchior.local:03126] mca: base: components_open: component sm open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component tcp | |
| [melchior.local:03126] mca: base: components_open: component tcp open function successful | |
| [melchior.local:03126] mca: base: components_register: registering framework coll components | |
| [melchior.local:03126] mca: base: components_register: found loaded component adapt | |
| [melchior.local:03126] mca: base: components_register: component adapt register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component basic | |
| [melchior.local:03126] mca: base: components_register: component basic register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component han | |
| [melchior.local:03126] mca: base: components_register: component han register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component inter | |
| [melchior.local:03126] mca: base: components_register: component inter register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component libnbc | |
| [melchior.local:03126] mca: base: components_register: component libnbc register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component self | |
| [melchior.local:03126] mca: base: components_register: component self register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component sync | |
| [melchior.local:03126] mca: base: components_register: component sync register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component tuned | |
| [melchior.local:03126] mca: base: components_register: component tuned register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component ftagree | |
| [melchior.local:03126] mca: base: components_register: component ftagree register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component monitoring | |
| [melchior.local:03126] mca: base: components_register: component monitoring register function successful | |
| [melchior.local:03126] mca: base: components_register: found loaded component sm | |
| [melchior.local:03126] mca: base: components_register: component sm register function successful | |
| [melchior.local:03126] mca: base: components_open: opening coll components | |
| [melchior.local:03126] mca: base: components_open: found loaded component adapt | |
| [melchior.local:03126] mca: base: components_open: component adapt open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component basic | |
| [melchior.local:03126] mca: base: components_open: found loaded component han | |
| [melchior.local:03126] mca: base: components_open: component han open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component inter | |
| [melchior.local:03126] mca: base: components_open: found loaded component libnbc | |
| [melchior.local:03126] mca: base: components_open: component libnbc open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component self | |
| [melchior.local:03126] mca: base: components_open: found loaded component sync | |
| [melchior.local:03126] mca: base: components_open: found loaded component tuned | |
| [melchior.local:03126] mca: base: components_open: component tuned open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component ftagree | |
| [melchior.local:03126] mca: base: components_open: found loaded component monitoring | |
| [melchior.local:03126] mca: base: components_open: component monitoring open function successful | |
| [melchior.local:03126] mca: base: components_open: found loaded component sm | |
| [melchior.local:03126] select: initializing btl component self | |
| [melchior.local:03126] select: init of component self returned success | |
| [melchior.local:03126] select: initializing btl component sm | |
| [melchior.local:03126] select: init of component sm returned failure | |
| [melchior.local:03126] mca: base: close: component sm closed | |
| [melchior.local:03126] mca: base: close: unloading component sm | |
| [melchior.local:03126] select: initializing btl component tcp | |
| [melchior.local:03126] btl: tcp: Using interface: lo0 | |
| [melchior.local:03126] btl: tcp: Searching for exclude address+prefix: 169.254.0.0 / 16 | |
| [melchior.local:03126] btl: tcp: Found match: 169.254.71.220 (bridge0) | |
| [melchior.local:03126] btl: tcp: Found match: 169.254.222.63 (en14) | |
| [melchior.local:03126] btl: tcp: Using interface: fe80::/10 | |
| [melchior.local:03126] btl:tcp: 0x12753c8f0: if en0 kidx 24 cnt 0 addr fdcd:ee7b:759e:1744:8:4538:670e:89cb IPv6 bw 100 lt 100 | |
| [melchior.local:03126] btl:tcp: 0x12753cc50: if en0 kidx 3 cnt 0 addr 192.168.68.53 IPv4 bw 100 lt 100 | |
| [melchior.local:03126] btl:tcp: Attempting to bind to AF_INET port 1024 | |
| [melchior.local:03126] btl:tcp: Successfully bound to AF_INET port 1024 | |
| [melchior.local:03126] btl:tcp: my listening v4 socket is 0.0.0.0:1024 | |
| [melchior.local:03126] btl:tcp: Attempting to bind to AF_INET6 port 1024 | |
| [melchior.local:03126] btl:tcp: Successfully bound to AF_INET6 port 1024 | |
| [melchior.local:03126] btl:tcp: my listening v6 socket port is 1024 | |
| [melchior.local:03126] btl: tcp: exchange: 0 24 IPv6 fdcd:ee7b:759e:1744:8:4538:670e:89cb | |
| [melchior.local:03126] btl: tcp: exchange: 1 3 IPv4 192.168.68.53 | |
| [melchior.local:03126] select: init of component tcp returned success | |
| [balthasar.local:04079] coll:find_available: querying coll component adapt | |
| [balthasar.local:04079] coll:find_available: coll component adapt is available | |
| [balthasar.local:04079] coll:find_available: querying coll component basic | |
| [balthasar.local:04079] coll:find_available: coll component basic is available | |
| [balthasar.local:04079] coll:find_available: querying coll component han | |
| [balthasar.local:04079] coll:han:init_query: pick me! pick me! | |
| [balthasar.local:04079] coll:find_available: coll component han is available | |
| [balthasar.local:04079] coll:find_available: querying coll component inter | |
| [balthasar.local:04079] coll:find_available: coll component inter is available | |
| [balthasar.local:04079] coll:find_available: querying coll component libnbc | |
| [balthasar.local:04079] coll:find_available: coll component libnbc is available | |
| [balthasar.local:04079] coll:find_available: querying coll component self | |
| [balthasar.local:04079] coll:find_available: coll component self is available | |
| [balthasar.local:04079] coll:find_available: querying coll component sync | |
| [balthasar.local:04079] coll:find_available: coll component sync is available | |
| [balthasar.local:04079] coll:find_available: querying coll component tuned | |
| [balthasar.local:04079] coll:find_available: coll component tuned is available | |
| [balthasar.local:04079] coll:find_available: querying coll component ftagree | |
| [balthasar.local:04079] coll:find_available: coll component ftagree is available | |
| [balthasar.local:04079] coll:find_available: querying coll component monitoring | |
| [balthasar.local:04079] coll:find_available: coll component monitoring is not available | |
| [balthasar.local:04079] mca: base: close: component monitoring closed | |
| [balthasar.local:04079] mca: base: close: unloading component monitoring | |
| [balthasar.local:04079] coll:find_available: querying coll component sm | |
| [balthasar.local:04079] coll:sm:init_query: pick me! pick me! | |
| [balthasar.local:04079] coll:find_available: coll component sm is available | |
| [balthasar.local:04079] mca: bml: Using self btl for send to [[40704,1],0] on node balthasar | |
| [melchior.local:03126] coll:find_available: querying coll component adapt | |
| [melchior.local:03126] coll:find_available: coll component adapt is available | |
| [melchior.local:03126] coll:find_available: querying coll component basic | |
| [melchior.local:03126] coll:find_available: coll component basic is available | |
| [melchior.local:03126] coll:find_available: querying coll component han | |
| [melchior.local:03126] coll:han:init_query: pick me! pick me! | |
| [melchior.local:03126] coll:find_available: coll component han is available | |
| [melchior.local:03126] coll:find_available: querying coll component inter | |
| [melchior.local:03126] coll:find_available: coll component inter is available | |
| [melchior.local:03126] coll:find_available: querying coll component libnbc | |
| [melchior.local:03126] coll:find_available: coll component libnbc is available | |
| [melchior.local:03126] coll:find_available: querying coll component self | |
| [melchior.local:03126] coll:find_available: coll component self is available | |
| [melchior.local:03126] coll:find_available: querying coll component sync | |
| [melchior.local:03126] coll:find_available: coll component sync is available | |
| [melchior.local:03126] coll:find_available: querying coll component tuned | |
| [melchior.local:03126] coll:find_available: coll component tuned is available | |
| [melchior.local:03126] coll:find_available: querying coll component ftagree | |
| [melchior.local:03126] coll:find_available: coll component ftagree is available | |
| [melchior.local:03126] coll:find_available: querying coll component monitoring | |
| [melchior.local:03126] coll:find_available: coll component monitoring is not available | |
| [melchior.local:03126] mca: base: close: component monitoring closed | |
| [melchior.local:03126] mca: base: close: unloading component monitoring | |
| [melchior.local:03126] coll:find_available: querying coll component sm | |
| [melchior.local:03126] coll:sm:init_query: pick me! pick me! | |
| [melchior.local:03126] coll:find_available: coll component sm is available | |
| [melchior.local:03126] mca: bml: Using self btl for send to [[40704,1],1] on node melchior | |
| [balthasar.local:04079] base_help_aggregate=true (default) | |
| [balthasar.local:04079] mca_base_param_files=/opt/homebrew/etc/openmpi-mca-params.conf (default) | |
| [balthasar.local:04079] mca_param_files=/opt/homebrew/etc/openmpi-mca-params.conf (default) | |
| [balthasar.local:04079] mca_base_override_param_file=/opt/homebrew/etc/openmpi-mca-params-override.conf (default) | |
| [balthasar.local:04079] mca_base_suppress_override_warning=false (default) | |
| [balthasar.local:04079] mca_base_param_file_prefix= (default) | |
| [balthasar.local:04079] mca_base_envar_file_prefix= (default) | |
| [balthasar.local:04079] mca_base_param_file_path=/opt/homebrew/Cellar/open-mpi/5.0.8/share/openmpi/amca-param-sets:/Users/muellerzr/mlx_stuff (default) | |
| [balthasar.local:04079] mca_base_param_file_path_force= (default) | |
| [balthasar.local:04079] opal_signal=6,10,8,11 (default) | |
| [balthasar.local:04079] opal_stacktrace_output=stderr (default) | |
| [balthasar.local:04079] opal_net_private_ipv4=10.0.0.0/8;172.16.0.0/12;192.168.0.0/16;169.254.0.0/16 (default) | |
| [balthasar.local:04079] opal_set_max_sys_limits= (default) | |
| [balthasar.local:04079] opal_var_dump_color=name=34,value=32,valid_values=36 (default) | |
| [balthasar.local:04079] opal_built_with_cuda_support=false (default) | |
| [balthasar.local:04079] opal_cuda_support=false (default) | |
| [balthasar.local:04079] opal_warn_on_missing_libcuda=true (default) | |
| [balthasar.local:04079] mpi_leave_pinned=auto (default) | |
| [balthasar.local:04079] opal_leave_pinned=auto (default) | |
| [balthasar.local:04079] mpi_leave_pinned_pipeline=false (default) | |
| [balthasar.local:04079] opal_leave_pinned_pipeline=false (default) | |
| [balthasar.local:04079] mpi_warn_on_fork=true (default) | |
| [balthasar.local:04079] opal_abort_delay=0 (default) | |
| [balthasar.local:04079] opal_abort_print_stack=true (environment) | |
| [balthasar.local:04079] mca_base_env_list= (default) | |
| [balthasar.local:04079] mca_base_env_list_delimiter=; (default) | |
| [balthasar.local:04079] opal_max_thread_in_progress=1 (default) | |
| [balthasar.local:04079] mca_base_component_path=/opt/homebrew/Cellar/open-mpi/5.0.8/lib/openmpi:/Users/muellerzr/.openmpi/components (default) | |
| [balthasar.local:04079] mca_component_path=/opt/homebrew/Cellar/open-mpi/5.0.8/lib/openmpi:/Users/muellerzr/.openmpi/components (default) | |
| [balthasar.local:04079] mca_base_component_show_load_errors=all (default) | |
| [balthasar.local:04079] mca_component_show_load_errors=all (default) | |
| [balthasar.local:04079] mca_base_component_track_load_errors=false (default) | |
| [balthasar.local:04079] mca_base_component_disable_dlopen=false (default) | |
| [balthasar.local:04079] mca_component_disable_dlopen=false (default) | |
| [balthasar.local:04079] mca_base_verbose=stderr (default) | |
| [balthasar.local:04079] mca_verbose=stderr (default) | |
| [balthasar.local:04079] dl= (default) | |
| [balthasar.local:04079] dl_base_verbose=error (default) | |
| [balthasar.local:04079] dl_dlopen_filename_suffixes=.so,.dylib,.dll,.sl (default) | |
| [balthasar.local:04079] mpi_ft_enable=false (default) | |
| [balthasar.local:04079] mpi_ft_verbose=0 (default) | |
| [balthasar.local:04079] mpi_ft_reliable_bcast=1 (default) | |
| [balthasar.local:04079] mpi_ft_propagator_with_rbcast=false (default) | |
| [balthasar.local:04079] mpi_ft_detector=false (default) | |
| [balthasar.local:04079] mpi_ft_detector_thread=false (default) | |
| [balthasar.local:04079] mpi_ft_detector_period=3.000000 (default) | |
| [balthasar.local:04079] mpi_ft_detector_timeout=10.000000 (default) | |
| [balthasar.local:04079] mpi_ft_detector_rdma_heartbeat=false (default) | |
| [balthasar.local:04079] mpi_param_check=true (default) | |
| [balthasar.local:04079] mpi_yield_when_idle=false (default) | |
| [balthasar.local:04079] mpi_event_tick_rate=-1 (default) | |
| [balthasar.local:04079] mpi_show_handle_leaks=false (default) | |
| [balthasar.local:04079] mpi_no_free_handles=false (default) | |
| [balthasar.local:04079] mpi_show_mpi_alloc_mem_leaks=0 (default) | |
| [balthasar.local:04079] mpi_show_mca_params=all (environment) | |
| [balthasar.local:04079] mpi_show_mca_params_file= (default) | |
| [balthasar.local:04079] mpi_preconnect_all=false (default) | |
| [balthasar.local:04079] mpi_have_sparse_group_storage=false (default) | |
| [balthasar.local:04079] mpi_use_sparse_group_storage=false (default) | |
| [balthasar.local:04079] mpi_cuda_support=false (default) | |
| [balthasar.local:04079] mpi_built_with_cuda_support=false (default) | |
| [balthasar.local:04079] mpi_add_procs_cutoff=0 (default) | |
| [balthasar.local:04079] mpi_dynamics_enabled=true (default) | |
| [balthasar.local:04079] async_mpi_init=false (default) | |
| [balthasar.local:04079] async_mpi_finalize=false (default) | |
| [balthasar.local:04079] mpi_abort_delay=0 (default) | |
| [balthasar.local:04079] mpi_abort_print_stack=true (default) | |
| [balthasar.local:04079] mpi_compat_mpi3=true (default) | |
| [balthasar.local:04079] mpi_pmix_connect_timeout=0 (default) | |
| [balthasar.local:04079] ompi_timing=false (default) | |
| [balthasar.local:04079] ompi_stream_buffering=-1 (default) | |
| [balthasar.local:04079] mpi_comm_verbose=0 (default) | |
| [balthasar.local:04079] if= (default) | |
| [balthasar.local:04079] if_base_verbose=error (default) | |
| [balthasar.local:04079] if_base_do_not_resolve=false (default) | |
| [balthasar.local:04079] if_base_retain_loopback=false (default) | |
| [balthasar.local:04079] threads= (default) | |
| [balthasar.local:04079] threads_base_verbose=error (default) | |
| [balthasar.local:04079] threads_pthreads_yield_strategy=sched_yield (default) | |
| [balthasar.local:04079] threads_pthreads_nanosleep_time=1 (default) | |
| [balthasar.local:04079] hwloc= (default) | |
| [balthasar.local:04079] hwloc_base_verbose=error (default) | |
| [balthasar.local:04079] hwloc_base_mem_bind_failure_action=warn (default) | |
| [balthasar.local:04079] memcpy= (default) | |
| [balthasar.local:04079] memcpy_base_verbose=error (default) | |
| [balthasar.local:04079] memchecker= (default) | |
| [balthasar.local:04079] memchecker_base_verbose=error (default) | |
| [balthasar.local:04079] backtrace= (default) | |
| [balthasar.local:04079] backtrace_base_verbose=error (default) | |
| [balthasar.local:04079] timer= (default) | |
| [balthasar.local:04079] timer_base_verbose=error (default) | |
| [balthasar.local:04079] timer_require_monotonic=true (default) | |
| [balthasar.local:04079] shmem= (default) | |
| [balthasar.local:04079] shmem_base_verbose=error (default) | |
| [balthasar.local:04079] shmem_mmap_priority=50 (default) | |
| [balthasar.local:04079] shmem_mmap_enable_nfs_warning=true (default) | |
| [balthasar.local:04079] shmem_mmap_relocate_backing_file=0 (default) | |
| [balthasar.local:04079] shmem_mmap_backing_file_base_dir=/dev/shm (default) | |
| [balthasar.local:04079] reachable= (default) | |
| [balthasar.local:04079] reachable_base_verbose=error (default) | |
| [balthasar.local:04079] pmix= (default) | |
| [balthasar.local:04079] pmix_base_verbose=error (default) | |
| [balthasar.local:04079] pmix_base_async_modex=false (default) | |
| [balthasar.local:04079] pmix_base_collect_data=true (default) | |
| [balthasar.local:04079] pmix_base_exchange_timeout=-1 (default) | |
| [balthasar.local:04079] accelerator= (default) | |
| [balthasar.local:04079] accelerator_base_verbose=error (default) | |
| [balthasar.local:04079] opal_event_include=select (default) | |
| [balthasar.local:04079] event_external_include=select (default) | |
| [balthasar.local:04079] opal_event_verbose=error (default) | |
| [balthasar.local:04079] event_base_verbose=error (default) | |
| [balthasar.local:04079] hook= (default) | |
| [balthasar.local:04079] hook_base_verbose=error (default) | |
| [balthasar.local:04079] hook_comm_method_verbose=0 (default) | |
| [balthasar.local:04079] hook_comm_method_display= (default) | |
| [balthasar.local:04079] hook_comm_method_max=12 (default) | |
| [balthasar.local:04079] hook_comm_method_brief=false (default) | |
| [balthasar.local:04079] hook_comm_method_fakefile= (default) | |
| [balthasar.local:04079] op= (default) | |
| [balthasar.local:04079] op_base_verbose=error (default) | |
| [balthasar.local:04079] op_aarch64_hardware_available=1 (default) | |
| [balthasar.local:04079] op_aarch64_double_supported=false (default) | |
| [balthasar.local:04079] allocator= (default) | |
| [balthasar.local:04079] allocator_base_verbose=error (default) | |
| [balthasar.local:04079] allocator_bucket_num_buckets=30 (default) | |
| [balthasar.local:04079] rcache= (default) | |
| [balthasar.local:04079] rcache_base_verbose=error (default) | |
| [balthasar.local:04079] rcache_grdma_print_stats=false (default) | |
| [balthasar.local:04079] mpool= (default) | |
| [balthasar.local:04079] mpool_base_verbose=error (default) | |
| [balthasar.local:04079] mpool_hugepage_priority=50 (default) | |
| [balthasar.local:04079] mpool_hugepage_page_size=2097152 (default) | |
| [balthasar.local:04079] smsc= (default) | |
| [balthasar.local:04079] smsc_base_verbose=error (default) | |
| [balthasar.local:04079] bml= (default) | |
| [balthasar.local:04079] bml_base_verbose=error (default) | |
| [balthasar.local:04079] bml_r2_show_unreach_errors=true (default) | |
| [balthasar.local:04079] btl= (default) | |
| [balthasar.local:04079] btl_base_verbose=max (environment) | |
| [balthasar.local:04079] btl_base_include= (default) | |
| [balthasar.local:04079] btl_base_exclude= (default) | |
| [balthasar.local:04079] btl_base_warn_peer_error=true (default) | |
| [balthasar.local:04079] btl_base_warn_component_unused=1 (default) | |
| [balthasar.local:04079] btl_self_free_list_num=0 (default) | |
| [balthasar.local:04079] btl_self_free_list_max=64 (default) | |
| [balthasar.local:04079] btl_self_free_list_inc=8 (default) | |
| [balthasar.local:04079] btl_self_exclusivity=65536 (default) | |
| [balthasar.local:04079] btl_self_atomic_flags= (default) | |
| [balthasar.local:04079] btl_self_rndv_eager_limit=131072 (default) | |
| [balthasar.local:04079] btl_self_eager_limit=1024 (default) | |
| [balthasar.local:04079] btl_self_get_limit=18446744073709551615 (default) | |
| [balthasar.local:04079] btl_self_get_alignment=0 (default) | |
| [balthasar.local:04079] btl_self_put_limit=18446744073709551615 (default) | |
| [balthasar.local:04079] btl_self_put_alignment=0 (default) | |
| [balthasar.local:04079] btl_self_accelerator_max_send_size=0 (default) | |
| [balthasar.local:04079] btl_self_max_send_size=16384 (default) | |
| [balthasar.local:04079] btl_self_rdma_pipeline_send_length=2147483647 (default) | |
| [balthasar.local:04079] btl_self_rdma_pipeline_frag_size=2147483647 (default) | |
| [balthasar.local:04079] btl_self_min_rdma_pipeline_size=2147484671 (default) | |
| [balthasar.local:04079] btl_self_latency=0 (default) | |
| [balthasar.local:04079] btl_self_bandwidth=100 (default) | |
| [balthasar.local:04079] btl_tcp_links=1 (default) | |
| [balthasar.local:04079] btl_tcp_if_include= (default) | |
| [balthasar.local:04079] btl_tcp_if_exclude=lo0,en12,bridge0,fe80::/10 (environment) | |
| [balthasar.local:04079] btl_tcp_free_list_num=8 (default) | |
| [balthasar.local:04079] btl_tcp_free_list_max=-1 (default) | |
| [balthasar.local:04079] btl_tcp_free_list_inc=32 (default) | |
| [balthasar.local:04079] btl_tcp_sndbuf=0 (default) | |
| [balthasar.local:04079] btl_tcp_rcvbuf=0 (default) | |
| [balthasar.local:04079] btl_tcp_endpoint_cache=30720 (default) | |
| [balthasar.local:04079] btl_tcp_use_nagle=0 (default) | |
| [balthasar.local:04079] btl_tcp_port_min_v4=1024 (default) | |
| [balthasar.local:04079] btl_tcp_port_range_v4=64511 (default) | |
| [balthasar.local:04079] btl_tcp_port_min_v6=1024 (default) | |
| [balthasar.local:04079] btl_tcp_port_range_v6=64511 (default) | |
| [balthasar.local:04079] btl_tcp_progress_thread=0 (default) | |
| [balthasar.local:04079] btl_tcp_warn_all_unfound_interfaces=false (default) | |
| [balthasar.local:04079] btl_tcp_exclusivity=100 (default) | |
| [balthasar.local:04079] btl_tcp_flags=send,put,inplace,need-ack,need-csum,hetero-rdma (default) | |
| [balthasar.local:04079] btl_tcp_atomic_flags= (default) | |
| [balthasar.local:04079] btl_tcp_rndv_eager_limit=65536 (default) | |
| [balthasar.local:04079] btl_tcp_eager_limit=65536 (default) | |
| [balthasar.local:04079] btl_tcp_put_limit=18446744073709551615 (default) | |
| [balthasar.local:04079] btl_tcp_put_alignment=0 (default) | |
| [balthasar.local:04079] btl_tcp_accelerator_max_send_size=0 (default) | |
| [balthasar.local:04079] btl_tcp_max_send_size=131072 (default) | |
| [balthasar.local:04079] btl_tcp_rdma_pipeline_send_length=131072 (default) | |
| [balthasar.local:04079] btl_tcp_rdma_pipeline_frag_size=2147482624 (default) | |
| [balthasar.local:04079] btl_tcp_min_rdma_pipeline_size=196608 (default) | |
| [balthasar.local:04079] btl_tcp_latency=0 (default) | |
| [balthasar.local:04079] btl_tcp_bandwidth=0 (default) | |
| [balthasar.local:04079] btl_tcp_disable_family=0 (default) | |
| [balthasar.local:04079] pml= (default) | |
| [balthasar.local:04079] pml_base_verbose=error (default) | |
| [balthasar.local:04079] pml_base_bsend_allocator=basic (default) | |
| [balthasar.local:04079] pml_base_wrapper= (default) | |
| [balthasar.local:04079] pml_wrapper= (default) | |
| [balthasar.local:04079] pml_base_check_pml=true (default) | |
| [balthasar.local:04079] pml_ob1_verbose=0 (default) | |
| [balthasar.local:04079] pml_ob1_free_list_num=4 (default) | |
| [balthasar.local:04079] pml_ob1_free_list_max=-1 (default) | |
| [balthasar.local:04079] pml_ob1_free_list_inc=64 (default) | |
| [balthasar.local:04079] pml_ob1_priority=20 (default) | |
| [balthasar.local:04079] pml_ob1_send_pipeline_depth=3 (default) | |
| [balthasar.local:04079] pml_ob1_recv_pipeline_depth=4 (default) | |
| [balthasar.local:04079] pml_ob1_max_rdma_per_request=4 (default) | |
| [balthasar.local:04079] pml_ob1_max_send_per_range=4 (default) | |
| [balthasar.local:04079] pml_ob1_unexpected_limit=128 (default) | |
| [balthasar.local:04079] pml_ob1_use_all_rdma=false (default) | |
| [balthasar.local:04079] pml_ob1_allocator=bucket (default) | |
| [balthasar.local:04079] pml_ob1_accelerator_events_max=400 (default) | |
| [balthasar.local:04079] coll= (default) | |
| [balthasar.local:04079] coll_base_verbose=max (environment) | |
| [balthasar.local:04079] coll_adapt_priority=0 (default) | |
| [balthasar.local:04079] coll_adapt_verbose=100 (default) | |
| [balthasar.local:04079] coll_adapt_context_free_list_min=64 (default) | |
| [balthasar.local:04079] coll_adapt_context_free_list_max=1024 (default) | |
| [balthasar.local:04079] coll_adapt_context_free_list_inc=32 (default) | |
| [balthasar.local:04079] coll_adapt_bcast_algorithm=1 (default) | |
| [balthasar.local:04079] coll_adapt_bcast_segment_size=0 (default) | |
| [balthasar.local:04079] coll_adapt_bcast_max_send_requests=2 (default) | |
| [balthasar.local:04079] coll_adapt_bcast_max_recv_requests=3 (default) | |
| [balthasar.local:04079] coll_adapt_bcast_synchronous_send=true (default) | |
| [balthasar.local:04079] coll_adapt_reduce_algorithm=1 (default) | |
| [balthasar.local:04079] coll_adapt_reduce_segment_size=163740 (default) | |
| [balthasar.local:04079] coll_adapt_reduce_max_send_requests=2 (default) | |
| [balthasar.local:04079] coll_adapt_reduce_max_recv_requests=3 (default) | |
| [balthasar.local:04079] coll_adapt_inbuf_free_list_min=10 (default) | |
| [balthasar.local:04079] coll_adapt_inbuf_free_list_max=10000 (default) | |
| [balthasar.local:04079] coll_adapt_inbuf_free_list_inc=10 (default) | |
| [balthasar.local:04079] coll_adapt_reduce_synchronous_send=true (default) | |
| [balthasar.local:04079] coll_basic_priority=10 (default) | |
| [balthasar.local:04079] coll_basic_crossover=4 (default) | |
| [balthasar.local:04079] coll_han_priority=35 (default) | |
| [balthasar.local:04079] coll_han_verbose=0 (default) | |
| [balthasar.local:04079] coll_han_bcast_segsize=65536 (default) | |
| [balthasar.local:04079] coll_han_bcast_up_module=self (default) | |
| [balthasar.local:04079] coll_han_bcast_low_module=self (default) | |
| [balthasar.local:04079] coll_han_reduce_segsize=65536 (default) | |
| [balthasar.local:04079] coll_han_reduce_up_module=self (default) | |
| [balthasar.local:04079] coll_han_reduce_low_module=self (default) | |
| [balthasar.local:04079] coll_han_allreduce_segsize=65536 (default) | |
| [balthasar.local:04079] coll_han_allreduce_up_module=self (default) | |
| [balthasar.local:04079] coll_han_allreduce_low_module=self (default) | |
| [balthasar.local:04079] coll_han_allgather_up_module=self (default) | |
| [balthasar.local:04079] coll_han_allgather_low_module=self (default) | |
| [balthasar.local:04079] coll_han_gather_up_module=self (default) | |
| [balthasar.local:04079] coll_han_gather_low_module=self (default) | |
| [balthasar.local:04079] coll_han_scatter_up_module=self (default) | |
| [balthasar.local:04079] coll_han_scatter_low_module=self (default) | |
| [balthasar.local:04079] coll_han_reproducible=false (default) | |
| [balthasar.local:04079] coll_han_use_allgather_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_allreduce_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_barrier_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_bcast_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_gather_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_reduce_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_scatter_algorithm=default (default) | |
| [balthasar.local:04079] coll_han_use_simple_allgather=false (default) | |
| [balthasar.local:04079] coll_han_use_simple_allreduce=false (default) | |
| [balthasar.local:04079] coll_han_use_simple_bcast=false (default) | |
| [balthasar.local:04079] coll_han_use_simple_gather=true (default) | |
| [balthasar.local:04079] coll_han_use_simple_reduce=false (default) | |
| [balthasar.local:04079] coll_han_use_simple_scatter=false (default) | |
| [balthasar.local:04079] coll_han_allgather_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_allgather_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_allgather_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_allgatherv_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_allgatherv_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_allgatherv_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_allreduce_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_allreduce_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_allreduce_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_barrier_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_barrier_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_barrier_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_bcast_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_bcast_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_bcast_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_gather_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_gather_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_gather_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_reduce_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_reduce_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_reduce_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_scatter_dynamic_intra_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_scatter_dynamic_inter_node_module=3 (default) | |
| [balthasar.local:04079] coll_han_scatter_dynamic_global_communicator_module=6 (default) | |
| [balthasar.local:04079] coll_han_use_dynamic_file_rules=false (default) | |
| [balthasar.local:04079] coll_han_dynamic_rules_filename= (default) | |
| [balthasar.local:04079] coll_han_dump_dynamic_rules=false (default) | |
| [balthasar.local:04079] coll_han_max_dynamic_errors=10 (default) | |
| [balthasar.local:04079] coll_inter_priority=40 (default) | |
| [balthasar.local:04079] coll_inter_verbose=0 (default) | |
| [balthasar.local:04079] coll_libnbc_priority=10 (default) | |
| [balthasar.local:04079] coll_libnbc_ibcast_skip_dt_decision=true (default) | |
| [balthasar.local:04079] coll_libnbc_iallgather_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_libnbc_iallreduce_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_libnbc_ibcast_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_libnbc_ibcast_knomial_radix=4 (default) | |
| [balthasar.local:04079] coll_libnbc_iexscan_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_libnbc_ireduce_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_libnbc_iscan_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_self_priority=75 (default) | |
| [balthasar.local:04079] coll_sync_priority=50 (default) | |
| [balthasar.local:04079] coll_sync_barrier_before=0 (default) | |
| [balthasar.local:04079] coll_sync_barrier_after=0 (default) | |
| [balthasar.local:04079] coll_tuned_priority=30 (default) | |
| [balthasar.local:04079] coll_tuned_init_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_init_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_small_msg=200 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_intermediate_msg=3000 (default) | |
| [balthasar.local:04079] coll_tuned_use_dynamic_rules=false (default) | |
| [balthasar.local:04079] coll_tuned_dynamic_rules_filename= (default) | |
| [balthasar.local:04079] coll_tuned_allreduce_algorithm_count=7 (default) | |
| [balthasar.local:04079] coll_tuned_allreduce_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_allreduce_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_allreduce_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_allreduce_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_algorithm_count=6 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_large_msg=3000 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_min_procs=0 (default) | |
| [balthasar.local:04079] coll_tuned_alltoall_algorithm_max_requests=0 (default) | |
| [balthasar.local:04079] coll_tuned_allgather_algorithm_count=8 (default) | |
| [balthasar.local:04079] coll_tuned_allgather_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_allgather_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_allgather_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_allgather_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_allgatherv_algorithm_count=7 (default) | |
| [balthasar.local:04079] coll_tuned_allgatherv_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_allgatherv_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_allgatherv_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_allgatherv_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_alltoallv_algorithm_count=3 (default) | |
| [balthasar.local:04079] coll_tuned_alltoallv_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_barrier_algorithm_count=7 (default) | |
| [balthasar.local:04079] coll_tuned_barrier_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_bcast_algorithm_count=10 (default) | |
| [balthasar.local:04079] coll_tuned_bcast_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_bcast_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_bcast_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_bcast_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_bcast_algorithm_knomial_radix=4 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_algorithm_count=8 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_reduce_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_algorithm_max_requests=0 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_count=5 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm_count=5 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_reduce_scatter_block_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_gather_algorithm_count=4 (default) | |
| [balthasar.local:04079] coll_tuned_gather_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_gather_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_gather_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_gather_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_algorithm_count=4 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_scatter_algorithm_segmentsize=0 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_algorithm_tree_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_algorithm_chain_fanout=4 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_min_procs=0 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_algorithm_max_requests=0 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_intermediate_msg=0 (default) | |
| [balthasar.local:04079] coll_tuned_scatter_large_msg=0 (default) | |
| [balthasar.local:04079] coll_tuned_exscan_algorithm_count=3 (default) | |
| [balthasar.local:04079] coll_tuned_exscan_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_tuned_scan_algorithm_count=3 (default) | |
| [balthasar.local:04079] coll_tuned_scan_algorithm=ignore (default) | |
| [balthasar.local:04079] coll_ftagree_priority=30 (default) | |
| [balthasar.local:04079] coll_ftagree_agreement=1 (default) | |
| [balthasar.local:04079] coll_ftagree_era_topology=1 (default) | |
| [balthasar.local:04079] coll_ftagree_era_rebuild=0 (default) | |
| [balthasar.local:04079] coll_sm_priority=0 (default) | |
| [balthasar.local:04079] coll_sm_control_size=4096 (default) | |
| [balthasar.local:04079] coll_sm_fragment_size=8192 (default) | |
| [balthasar.local:04079] coll_sm_comm_in_use_flags=2 (default) | |
| [balthasar.local:04079] coll_sm_comm_num_segments=8 (default) | |
| [balthasar.local:04079] coll_sm_tree_degree=4 (default) | |
| [balthasar.local:04079] coll_sm_info_num_procs=4 (default) | |
| [balthasar.local:04079] coll_sm_shared_mem_used_data=548864 (default) | |
| [balthasar.local:04079] osc= (default) | |
| [balthasar.local:04079] osc_base_verbose=error (default) | |
| [balthasar.local:04079] osc_sm_backing_directory=/var/folders/t6/dkzc3gr14b34jmq69r00103m0000gn/T//prterun.balthasar.4077.501/1/0 (default) | |
| [balthasar.local:04079] osc_sm_priority=100 (default) | |
| [balthasar.local:04079] osc_rdma_no_locks=false (default) | |
| [balthasar.local:04079] osc_rdma_acc_single_intrinsic=false (default) | |
| [balthasar.local:04079] osc_rdma_acc_use_amo=true (default) | |
| [balthasar.local:04079] osc_rdma_buffer_size=32768 (default) | |
| [balthasar.local:04079] osc_rdma_max_attach=64 (default) | |
| [balthasar.local:04079] osc_rdma_priority=20 (default) | |
| [balthasar.local:04079] osc_rdma_locking_mode=two_level (default) | |
| [balthasar.local:04079] osc_rdma_btls=ugni,uct,ofi (default) | |
| [balthasar.local:04079] osc_rdma_backing_directory=/var/folders/t6/dkzc3gr14b34jmq69r00103m0000gn/T//prterun.balthasar.4077.501/1/0 (default) | |
| [balthasar.local:04079] osc_rdma_network_max_amo=32 (default) | |
| [balthasar.local:04079] osc_rdma_minimum_memory_alignment=16384 (default) | |
| [balthasar.local:04079] btl_tcp_bandwidth_en0=100 (default) | |
| [balthasar.local:04079] btl_tcp_latency_en0=100 (default) | |
| [balthasar.local:04079] btl_tcp_bandwidth_en0:0=100 (default) | |
| [balthasar.local:04079] btl_tcp_latency_en0:0=100 (default) | |
| [balthasar.local:04079] part= (default) | |
| [balthasar.local:04079] part_base_verbose=error (default) | |
| [balthasar.local:04079] part_persist_free_list_num=4 (default) | |
| [balthasar.local:04079] part_persist_free_list_max=-1 (default) | |
| [balthasar.local:04079] part_persist_free_list_inc=64 (default) | |
| [balthasar.local:04079] coll:base:comm_select: new communicator: MPI_COMM_WORLD (cid 0) | |
| [balthasar.local:04079] coll:base:comm_select: Checking all available modules | |
| [balthasar.local:04079] coll:adapt:comm_query (0/MPI_COMM_WORLD): pick me! pick me! | |
| [balthasar.local:04079] coll:base:comm_select: component available: adapt, priority: 0 | |
| [balthasar.local:04079] coll:base:comm_select: component available: basic, priority: 10 | |
| [balthasar.local:04079] coll:han:comm_query (0/MPI_COMM_WORLD): pick me! pick me! | |
| [balthasar.local:04079] coll:base:comm_select: component available: han, priority: 35 | |
| [balthasar.local:04079] coll:base:comm_select: component not available: inter | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: inter (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component available: libnbc, priority: 10 | |
| [balthasar.local:04079] coll:base:comm_select: component not available: self | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: self (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component not available: sync | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: sync (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component available: tuned, priority: 30 | |
| [balthasar.local:04079] coll:base:comm_select: component not available: ftagree | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0) | |
| [balthasar.local:04079] coll:sm:comm_query (0/MPI_COMM_WORLD): intercomm, comm is too small, or not all peers local; disqualifying myself | |
| [balthasar.local:04079] coll:base:comm_select: component not available: sm | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: sm (priority -1 < 0) | |
| [balthasar.local:04079] (0/MPI_COMM_WORLD): no underlying reduce; disqualifying myself | |
| [balthasar.local:04079] coll:base:comm_select: selecting adapt, priority 0, Disabled | |
| [balthasar.local:04079] coll:base:comm_select: selecting basic, priority 10, Enabled | |
| [balthasar.local:04079] coll:base:comm_select: selecting libnbc, priority 10, Enabled | |
| [balthasar.local:04079] coll:base:comm_select: selecting tuned, priority 30, Enabled | |
| [balthasar.local:04079] coll:han:get_all_coll_modules HAN found module basic with id 1 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD) | |
| [balthasar.local:04079] coll:han:get_all_coll_modules HAN found module libnbc with id 2 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD) | |
| [balthasar.local:04079] coll:han:get_all_coll_modules HAN found module tuned with id 3 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD) | |
| [balthasar.local:04079] coll:han:get_all_coll_modules HAN sub-communicator modules storage for topological level 2 (global_communicator) gets 4 modules for communicator (0/MPI_COMM_WORLD) | |
| [balthasar.local:04079] coll:han:reduce_reproducible: fallback on tuned | |
| [balthasar.local:04079] coll:han:allreduce_reproducible: fallback on tuned | |
| [balthasar.local:04079] coll:base:comm_select: selecting han, priority 35, Enabled | |
| [balthasar.local:04079] coll:base:comm_select: new communicator: MPI_COMM_SELF (cid 1) | |
| [balthasar.local:04079] coll:base:comm_select: Checking all available modules | |
| [balthasar.local:04079] coll:adapt:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small; disqualifying myself | |
| [balthasar.local:04079] coll:base:comm_select: component not available: adapt | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: adapt (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component available: basic, priority: 10 | |
| [balthasar.local:04079] coll:han:comm_query (1/MPI_COMM_SELF): comm is too small; disqualifying myself | |
| [balthasar.local:04079] coll:base:comm_select: component not available: han | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: han (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component not available: inter | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: inter (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component available: libnbc, priority: 10 | |
| [balthasar.local:04079] coll:base:comm_select: component available: self, priority: 75 | |
| [balthasar.local:04079] coll:base:comm_select: component not available: sync | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: sync (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component not available: tuned | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: tuned (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: component not available: ftagree | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0) | |
| [balthasar.local:04079] coll:sm:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small, or not all peers local; disqualifying myself | |
| [balthasar.local:04079] coll:base:comm_select: component not available: sm | |
| [balthasar.local:04079] coll:base:comm_select: component disqualified: sm (priority -1 < 0) | |
| [balthasar.local:04079] coll:base:comm_select: selecting basic, priority 10, Enabled | |
| [balthasar.local:04079] coll:base:comm_select: selecting libnbc, priority 10, Enabled | |
| [balthasar.local:04079] coll:base:comm_select: selecting self, priority 75, Enabled | |
| [melchior.local:03126] coll:base:comm_select: new communicator: MPI_COMM_WORLD (cid 0) | |
| [melchior.local:03126] coll:base:comm_select: Checking all available modules | |
| [melchior.local:03126] coll:adapt:comm_query (0/MPI_COMM_WORLD): pick me! pick me! | |
| [melchior.local:03126] coll:base:comm_select: component available: adapt, priority: 0 | |
| [melchior.local:03126] coll:base:comm_select: component available: basic, priority: 10 | |
| [melchior.local:03126] coll:han:comm_query (0/MPI_COMM_WORLD): pick me! pick me! | |
| [melchior.local:03126] coll:base:comm_select: component available: han, priority: 35 | |
| [melchior.local:03126] coll:base:comm_select: component not available: inter | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: inter (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component available: libnbc, priority: 10 | |
| [melchior.local:03126] coll:base:comm_select: component not available: self | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: self (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component not available: sync | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: sync (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component available: tuned, priority: 30 | |
| [melchior.local:03126] coll:base:comm_select: component not available: ftagree | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0) | |
| [melchior.local:03126] coll:sm:comm_query (0/MPI_COMM_WORLD): intercomm, comm is too small, or not all peers local; disqualifying myself | |
| [melchior.local:03126] coll:base:comm_select: component not available: sm | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: sm (priority -1 < 0) | |
| [melchior.local:03126] (0/MPI_COMM_WORLD): no underlying reduce; disqualifying myself | |
| [melchior.local:03126] coll:base:comm_select: selecting adapt, priority 0, Disabled | |
| [melchior.local:03126] coll:base:comm_select: selecting basic, priority 10, Enabled | |
| [melchior.local:03126] coll:base:comm_select: selecting libnbc, priority 10, Enabled | |
| [melchior.local:03126] coll:base:comm_select: selecting tuned, priority 30, Enabled | |
| [melchior.local:03126] coll:han:get_all_coll_modules HAN found module basic with id 1 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD) | |
| [melchior.local:03126] coll:han:get_all_coll_modules HAN found module libnbc with id 2 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD) | |
| [melchior.local:03126] coll:han:get_all_coll_modules HAN found module tuned with id 3 for topological level 2 (global_communicator) for communicator (0/MPI_COMM_WORLD) | |
| [melchior.local:03126] coll:han:get_all_coll_modules HAN sub-communicator modules storage for topological level 2 (global_communicator) gets 4 modules for communicator (0/MPI_COMM_WORLD) | |
| [melchior.local:03126] coll:base:comm_select: selecting han, priority 35, Enabled | |
| [melchior.local:03126] coll:base:comm_select: new communicator: MPI_COMM_SELF (cid 1) | |
| [melchior.local:03126] coll:base:comm_select: Checking all available modules | |
| [melchior.local:03126] coll:adapt:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small; disqualifying myself | |
| [melchior.local:03126] coll:base:comm_select: component not available: adapt | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: adapt (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component available: basic, priority: 10 | |
| [melchior.local:03126] coll:han:comm_query (1/MPI_COMM_SELF): comm is too small; disqualifying myself | |
| [melchior.local:03126] coll:base:comm_select: component not available: han | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: han (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component not available: inter | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: inter (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component available: libnbc, priority: 10 | |
| [melchior.local:03126] coll:base:comm_select: component available: self, priority: 75 | |
| [melchior.local:03126] coll:base:comm_select: component not available: sync | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: sync (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component not available: tuned | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: tuned (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: component not available: ftagree | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: ftagree (priority -1 < 0) | |
| [melchior.local:03126] coll:sm:comm_query (1/MPI_COMM_SELF): intercomm, comm is too small, or not all peers local; disqualifying myself | |
| [melchior.local:03126] coll:base:comm_select: component not available: sm | |
| [melchior.local:03126] coll:base:comm_select: component disqualified: sm (priority -1 < 0) | |
| [melchior.local:03126] coll:base:comm_select: selecting basic, priority 10, Enabled | |
| [melchior.local:03126] coll:base:comm_select: selecting libnbc, priority 10, Enabled | |
| [melchior.local:03126] coll:base:comm_select: selecting self, priority 75, Enabled | |
| [balthasar.local:04079] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective | |
| [balthasar.local:04079] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective | |
| [balthasar.local:04079] coll:han:get_algorithm allreduce size:4 algorithm:0 default | |
| [balthasar.local:04079] mca: bml: Using tcp btl for send to [[40704,1],1] on node melchior | |
| [balthasar.local:04079] mca: bml: Using tcp btl for send to [[40704,1],1] on node melchior | |
| [balthasar.local:04079] btl: tcp: attempting to connect() to [[40704,1],1] address 192.168.68.53 on port 1024 | |
| [balthasar.local:04079] btl:tcp: would block, so allowing background progress | |
| [balthasar.local:04079] btl:tcp: connect() to 192.168.68.53:1024 completed (complete_connect), sending connect ACK | |
| [melchior.local:03126] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective | |
| [melchior.local:03126] coll:han:get_dynamic_rule HAN searched for collective 2 (allreduce) but did not find any rule for this collective | |
| [melchior.local:03126] mca: bml: Using tcp btl for send to [[40704,1],0] on node balthasar | |
| [melchior.local:03126] mca: bml: Using tcp btl for send to [[40704,1],0] on node balthasar | |
| [melchior.local:03126] btl: tcp: attempting to connect() to [[40704,1],0] address 192.168.68.80 on port 1024 | |
| [melchior.local:03126] btl:tcp: would block, so allowing background progress | |
| [melchior.local:03126] btl:tcp: now connected to 192.168.68.80, process [[40704,1],0] | |
| [melchior.local:03126] btl: tcp: attempting to connect() to [[40704,1],0] address fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5 on port 1024 | |
| [melchior.local:03126] btl:tcp: would block, so allowing background progress | |
| [balthasar.local:04079] btl:tcp: now connected to fdcd:ee7b:759e:1744:a200:7dd3:e2d:7f69, process [[40704,1],1] | |
| [melchior.local:03126] btl:tcp: connect() to fdcd:ee7b:759e:1744:42b:9477:f1a9:15b5:1024 completed (complete_connect), sending connect ACK | |
| [balthasar][[40704,1],0][btl_tcp_frag.c:228:mca_btl_tcp_frag_recv] mca_btl_tcp_frag_recv: readv error (0x17ffffb38, 58568) | |
| Bad address(1) | |
| [balthasar:00000] *** An error occurred in Socket closed | |
| [balthasar:00000] *** reported by process [2667577345,0] | |
| [balthasar:00000] *** on a NULL communicator | |
| [balthasar:00000] *** Unknown error | |
| [balthasar:00000] *** MPI_ERRORS_ARE_FATAL (processes in this communicator will now abort, | |
| [balthasar:00000] *** and MPI will try to terminate your MPI job as well) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment