Last active
November 16, 2024 05:13
-
-
Save vwxyzjn/481eb97713b9a6eb0e1b15afeb47286d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python scripts/submit_finetune_job.py \ | |
--cluster ai2/augusta-google-1 \ | |
--priority high \ | |
--workspace ai2/tulu-3-dev \ | |
--num_nodes 4 \ | |
--image costah/open_instruct_ppo_ray_ninja \ | |
--default_beaker_config configs/beaker_configs/default_finetune_multinode.yaml \ | |
--config configs/train_configs/sft/tulu3_8b_preview_mix_v3.9.yaml \ | |
--exp_name olmo1124_finetune |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: v2 | |
description: open-instruct-finetune-multinode | |
budget: ai2/oe-adapt | |
tasks: | |
- name: open-instruct-finetune-multinode | |
replicas: 4 | |
leaderSelection: true | |
hostNetworking: true | |
propagateFailure: true | |
propagatePreemption: true | |
synchronizedStartTimeout: 60m | |
image: | |
beaker: nathanl/open_instruct_auto | |
command: [ | |
'/bin/sh', '-c' | |
] | |
arguments: [' | |
unset CUDA_LAUNCH_BLOCKING && PYTHONPATH="/stage:$PYTHONPATH" pip install git+https://github.com/vwxyzjn/transformers.git@olmo1124_classification && accelerate launch | |
--mixed_precision bf16 | |
--num_machines 4 | |
--num_processes 32 | |
--machine_rank $BEAKER_REPLICA_RANK | |
--main_process_ip $BEAKER_LEADER_REPLICA_HOSTNAME | |
--main_process_port 29400 | |
--use_deepspeed | |
--deepspeed_config_file configs/ds_configs/stage3_no_offloading_accelerate.conf | |
--deepspeed_multinode_launcher standard | |
open_instruct/finetune.py | |
--model_name_or_path meta-llama/Meta-Llama-3-8B | |
--tokenizer_name meta-llama/Meta-Llama-3-8B | |
--use_slow_tokenizer | |
--use_flash_attn | |
--max_seq_length 4096 | |
--preprocessing_num_workers 16 | |
--per_device_train_batch_size 1 | |
--gradient_accumulation_steps 4 | |
--learning_rate 5e-6 | |
--lr_scheduler_type linear | |
--warmup_ratio 0.03 | |
--weight_decay 0. | |
--num_train_epochs 2 | |
--output_dir /output/ | |
--with_tracking | |
--report_to tensorboard | |
--logging_steps 1 | |
--reduce_loss sum | |
'] | |
envVars: | |
- name: CUDA_DEVICE_ORDER | |
value: PCI_BUS_ID | |
- name: TRANSFORMERS_CACHE | |
value: ./cache/ | |
- name: WANDB_API_KEY | |
secret: WANDB_API_KEY | |
- name: WANDB_PROJECT | |
value: open-instruct | |
- name: WANDB_WATCH | |
value: false | |
- name: WANDB_LOG_MODEL | |
value: false | |
- name: WANDB_DISABLED | |
value: true | |
- name: HF_TOKEN | |
secret: HF_TOKEN | |
# datasets: | |
# - mountPath: /oe-adapt-default | |
# source: | |
# weka: oe-adapt-default | |
result: | |
path: /output | |
resources: | |
gpuCount: 8 | |
context: | |
priority: normal | |
preemptible: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
import subprocess | |
import yaml | |
from datetime import datetime | |
import argparse | |
import re | |
import shlex | |
from open_instruct.utils import get_beaker_whoami | |
def load_yaml(file_path): | |
with open(file_path, 'r') as f: | |
return yaml.load(f, Loader=yaml.FullLoader) | |
def main(): | |
parser = argparse.ArgumentParser(description="Run experiment with Beaker config") | |
parser.add_argument("--default_beaker_config", default="configs/beaker_configs/default_finetune.yaml", | |
help="Path to the default Beaker config file") | |
parser.add_argument("--config", default=None, | |
help="Path to an additional config file to override default settings") | |
parser.add_argument("--cluster", type=str, default="ai2/allennlp-cirrascale", help="Beaker cluster to use") | |
parser.add_argument("--priority", type=str, default="high", help="Priority of the job") | |
parser.add_argument("--preemptible", type=bool, default=True, help="Whether to use preemptible instances") | |
parser.add_argument("--num_gpus", type=int, default=8, help="Number of GPUs to use") | |
parser.add_argument("--num_nodes", type=int, default=1, help="Number of nodes to use") | |
parser.add_argument("--image", type=str, default="nathanl/open_instruct_auto", help="Beaker image to use.") | |
parser.add_argument("--workspace", type=str, default="ai2/tulu-2-improvements", help="Beaker workspace to use.") | |
parser.add_argument("--datasets", nargs='+', help="List of datasets to mount in form <beaker_id>:<mount_path>") | |
# allow unknown args from CLI, use this to modify loaded config in bash scripts for sweeping | |
# Note, can only override args in --config passed (not default FlatArguments class in open_instruct/utils.py) | |
# Use parse_known_args instead of parse_args | |
args, unknown = parser.parse_known_args() | |
# Process unknown arguments | |
# must be of the form --{arg} {value} | |
unknown_args = {} | |
i = 0 | |
while i < len(unknown): | |
if unknown[i].startswith('--'): | |
key = unknown[i][2:] | |
if i + 1 < len(unknown) and not unknown[i+1].startswith('--'): | |
value = unknown[i+1] | |
i += 2 | |
else: | |
value = None | |
i += 1 | |
unknown_args[key] = value | |
else: | |
i += 1 | |
# Print known arguments | |
train_config = load_yaml(args.config) | |
print("Config:", train_config) | |
# Print unknown arguments | |
print("Unknown arguments:", unknown_args) | |
now = datetime.now().strftime("%m%d%Y%H%M%S") | |
with open(args.default_beaker_config, 'r') as f: | |
default_yaml = f.read() | |
d1 = yaml.load(default_yaml, Loader=yaml.FullLoader) | |
if args.num_nodes > 1: | |
assert args.num_gpus == 8, "`num_gpus` must be set to 8 when training with multiple nodes" | |
d1['tasks'][0]['replicas'] = args.num_nodes | |
d1['tasks'][0]['image']['beaker'] = args.image | |
d1['tasks'][0]['context']['cluster'] = args.cluster | |
d1['tasks'][0]['context']['priority'] = args.priority | |
d1['tasks'][0]['context']['preemptible'] = args.preemptible # True requried for Jupiter/Pluto | |
d1['tasks'][0]['resources']['gpuCount'] = args.num_gpus | |
# modify here for different set of experiments | |
experiment_group = "dataset_comparison" | |
wandb_project = "open_instruct" | |
# if args.wandb_api_key: | |
# wandb_api_key = args.wandb_api_key | |
# else: | |
# wandb_api_key = os.environ.get("WANDB_API_KEY") | |
# if config is passed, load and merge that | |
def override_and_reconstruct_command(original_command, train_config, unknown_args): | |
def parse_args(args): | |
cmd_dict = {} | |
i = 0 | |
while i < len(args): | |
if args[i].startswith('--'): | |
key = args[i][2:] | |
if i + 1 < len(args) and not args[i+1].startswith('--'): | |
cmd_dict[key] = args[i+1] | |
i += 2 | |
else: | |
cmd_dict[key] = True | |
i += 1 | |
else: | |
i += 1 | |
return cmd_dict | |
# Split the original command into a list | |
cmd_parts = shlex.split(original_command) | |
# Find the index of open_instruct/finetune.py | |
script_index = cmd_parts.index('open_instruct/finetune.py') | |
# Find the index of 'accelerate launch' | |
pre_index = cmd_parts.index('launch') | |
# Separate the command into pre-script and post-script parts | |
pre_script = cmd_parts[:pre_index+1] # 'accelerate launch' | |
pre_script_args = cmd_parts[pre_index+1:script_index] | |
post_script_args = cmd_parts[script_index+1:] | |
# Parse arguments | |
pre_dict = parse_args(pre_script_args) | |
post_dict = parse_args(post_script_args) | |
# Combine dictionaries and apply overrides | |
cmd_dict = {**post_dict} | |
cmd_dict.update(train_config) | |
cmd_dict.update(unknown_args) | |
# Reconstruct the command string | |
new_cmd_parts = pre_script | |
# add pre python args | |
for key, value in pre_dict.items(): | |
new_cmd_parts.append(f'--{key}') | |
if value is not True: | |
new_cmd_parts.append(str(value)) | |
# add python job + post args | |
new_cmd_parts.append('open_instruct/finetune.py') | |
for key, value in cmd_dict.items(): | |
if key == "dataset_mixer": | |
key = "dataset_mixer_list" | |
value = parse_dataset_mixer(value) | |
new_cmd_parts.append(f'--{key}') | |
# if string in [], expand args | |
if isinstance(value, list): | |
for v in value: | |
new_cmd_parts.append(str(v)) | |
elif value is not True: | |
new_cmd_parts.append(str(value)) | |
return ' '.join(new_cmd_parts) | |
new_arguments = override_and_reconstruct_command(d1['tasks'][0]['arguments'][0], train_config, unknown_args) | |
# place --num_processes with args.num_gpus * args.num_nodes | |
# will be --num_processes {N} before | |
new_arguments = re.sub(r'--num_processes \d+', f'--num_processes {args.num_gpus * args.num_nodes}', new_arguments) | |
# place --num_machines with args.num_nodes | |
# will be --num_machines {N} before | |
new_arguments = re.sub(r'--num_machines \d+', f'--num_machines {args.num_nodes}', new_arguments) | |
model_name = get_model_name(new_arguments) | |
# if model name has /, replace with _ | |
model_name = model_name.replace("/", "_") | |
# try given config only has one | |
dataset_name, dataset_mixer, train_file = check_dataset_selection(new_arguments) | |
print(f"Dataset selection is valid.") | |
print(f"Dataset name: {dataset_name}") | |
print(f"Dataset mixer: {dataset_mixer}") | |
print(f"Train file: {train_file}") | |
d = copy.deepcopy(d1) | |
d['tasks'][0]['arguments'][0] = new_arguments | |
# name and description | |
exp_name = f"open_instruct_finetune_{model_name}_{now}" | |
d['description'] = exp_name | |
d['tasks'][0]['name'] = exp_name | |
# add cluster-specific env vars | |
if args.num_nodes > 1: | |
if args.cluster == "ai2/jupiter-cirrascale-2": | |
d['tasks'][0]['envVars'] += [ | |
{ | |
"name": "NCCL_SOCKET_IFNAME", | |
"value": "ib", | |
}, | |
{ | |
"name": "NCCL_IB_HCA", | |
"value": "^=mlx5_bond_0", | |
}, | |
{ | |
"name": "NCCL_DEBUG", | |
"value": "INFO", | |
}, | |
] | |
elif args.cluster == "ai2/pluto-cirrascale": | |
d['tasks'][0]['envVars'] += [ | |
{ | |
"name": "NCCL_IB_HCA", | |
"value": "^=mlx5_1,mlx5_2", | |
}, | |
{ | |
"name": "NCCL_DEBUG", | |
"value": "INFO", | |
}, | |
] | |
elif args.cluster == "ai2/augusta-google-1": | |
d['tasks'][0]['envVars'] += [ | |
{ | |
"name":"LD_LIBRARY_PATH", | |
"value": r"/var/lib/tcpxo/lib64:${LD_LIBRARY_PATH}", | |
}, | |
{ | |
"name":"NCCL_CROSS_NIC", | |
"value": "0", | |
}, | |
{ | |
"name":"NCCL_ALGO", | |
"value": "Ring,Tree", | |
}, | |
{ | |
"name":"NCCL_PROTO", | |
"value": "Simple", | |
}, | |
{ | |
"name":"NCCL_MIN_NCHANNELS", | |
"value": "4", | |
}, | |
{ | |
"name":"NCCL_P2P_NET_CHUNKSIZE", | |
"value": "524288", | |
}, | |
{ | |
"name":"NCCL_P2P_PCI_CHUNKSIZE", | |
"value": "524288", | |
}, | |
{ | |
"name":"NCCL_P2P_NVL_CHUNKSIZE", | |
"value": "1048576", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_NUM_FLOWS", | |
"value": "2", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_ENABLE_CONTROL_CHANNEL", | |
"value": "0", | |
}, | |
{ | |
"name":"NCCL_BUFFSIZE", | |
"value": "8388608", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_USE_SNAP", | |
"value": "1", | |
}, | |
{ | |
"name":"CUDA_VISIBLE_DEVICES", | |
"value": "0,1,2,3,4,5,6,7", | |
}, | |
{ | |
"name":"NCCL_NET_GDR_LEVEL", | |
"value": "PIX", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_ENABLE_HOTPATH_LOGGING", | |
"value": "0", | |
}, | |
{ | |
"name":"NCCL_TUNER_PLUGIN", | |
"value": "libnccl-tuner.so", | |
}, | |
{ | |
"name":"NCCL_TUNER_CONFIG_PATH", | |
"value": "/var/lib/tcpxo/lib64/a3plus_tuner_config.textproto", | |
}, | |
{ | |
"name":"NCCL_SHIMNET_GUEST_CONFIG_CHECKER_CONFIG_FILE", | |
"value": "/var/lib/tcpxo/lib64/a3plus_guest_config.textproto", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_PLUGIN_ACCEPT_TIMEOUT_MS", | |
"value": "600000", | |
}, | |
{ | |
"name":"NCCL_NVLS_ENABLE", | |
"value": "0", | |
}, | |
{ | |
"name":"NCCL_DEBUG", | |
"value": "WARN", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_CTRL_DEV", | |
"value": "enp0s12", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_IFNAME", | |
"value": "enp6s0,enp7s0,enp13s0,enp14s0,enp134s0,enp135s0,enp141s0,enp142s0", | |
}, | |
{ | |
"name":"NCCL_SOCKET_IFNAME", | |
"value": "enp0s12", | |
}, | |
{ | |
"name":"NCCL_USE_SNAP", | |
"value": "1", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_USE_LLCM", | |
"value": "1", | |
}, | |
{ | |
"name":"NCCL_FASTRAK_LLCM_DEVICE_DIRECTORY", | |
"value": "/dev/aperture_devices", | |
}, | |
] | |
# WANDB settings | |
for env in d['tasks'][0]['envVars']: | |
if env['name'] == "WANDB_DISABLED": | |
env['value'] = False | |
if env['name'] == "WANDB_PROJECT": | |
env['value'] = wandb_project | |
beaker_whoami = get_beaker_whoami() | |
d['tasks'][0]['envVars'].append({ | |
'name': 'WANDB_NAME', 'value': exp_name | |
}) | |
d['tasks'][0]['envVars'].append({ | |
'name': 'WANDB_RUN_GROUP', 'value': experiment_group | |
}) | |
d['tasks'][0]['envVars'].append({ | |
'name': 'BEAKER_TOKEN', 'secret': f"{beaker_whoami}_BEAKER_TOKEN" | |
}) | |
d['tasks'][0]['envVars'].append({ | |
'name': 'HF_TOKEN', 'secret': f"{beaker_whoami}_HF_TOKEN" | |
}) | |
d['tasks'][0]['envVars'].append({ | |
'name': 'WANDB_API_KEY', 'secret': f"{beaker_whoami}_WANDB_API_KEY" | |
}) | |
# mount datasets | |
if args.datasets: | |
if not d['tasks'][0].get('datasets'): | |
d['tasks'][0]['datasets'] = [] | |
for dataset in args.datasets: | |
beaker_id, mount_path = dataset.split(':') | |
d['tasks'][0]['datasets'].append({ | |
'mountPath': mount_path, | |
'source': { 'beaker': beaker_id } | |
}) | |
# optionally, print to debug config | |
print(d) | |
fn = "configs/beaker_configs/auto_created/{}.yaml".format(exp_name) | |
file = open(fn, "w") | |
yaml.dump(d, file, default_flow_style=True) | |
file.close() | |
cmd = f"beaker experiment create {fn} --workspace {args.workspace}" | |
subprocess.Popen(cmd, shell=True) | |
def check_dataset_selection(command_string): | |
parts = shlex.split(command_string) | |
dataset_name = None | |
dataset_mixer = None | |
train_file = None | |
for i, part in enumerate(parts): | |
if part == '--dataset_name' and i + 1 < len(parts): | |
dataset_name = parts[i + 1] | |
elif part == '--dataset_mixer_list' and i + 1 < len(parts): | |
dataset_mixer = parts[i + 1] | |
j = i + 2 | |
while j < len(parts) - 1: | |
dataset_mixer += ' ' + parts[j] | |
if '--' in parts[j+1]: | |
break | |
j += 1 | |
elif part == '--train_file' and i + 1 < len(parts): | |
train_file = parts[i + 1] | |
if ((dataset_name is not None and dataset_mixer is not None) or | |
(dataset_name is not None and train_file is not None) or | |
(dataset_mixer is not None and train_file is not None)): | |
raise ValueError("Cannot provide two dataset selection mechanisms.") | |
return dataset_name, dataset_mixer, train_file | |
def parse_dataset_mixer(mixer_dict): | |
elems = [] | |
for k, v in mixer_dict.items(): | |
elems.append(k) | |
elems.append(str(v)) | |
return ' '.join(elems) | |
def get_model_name(command_string): | |
parts = shlex.split(command_string) | |
for i, part in enumerate(parts): | |
if part == '--model_name_or_path': | |
if i + 1 < len(parts): | |
return parts[i + 1] | |
return None # Return None if model name is not found | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model_name_or_path: allenai/open_instruct_dev | |
model_revision: peteish7-weka-anneal-from-928646-50B-nowup_big-number-no-whammy-2_soup_step11931_hf | |
use_flash_attn: true | |
tokenizer_name: allenai/open_instruct_dev | |
tokenizer_revision: peteish7-weka-anneal-from-928646-50B-nowup_big-number-no-whammy-2_soup_step11931_hf | |
use_slow_tokenizer: true | |
dataset_mixer: | |
# General datasets: | |
allenai/tulu-v3.9-tmp: 1.0 | |
# Datasets removed because of licensing: | |
# ai2-adapt-dev/flan_v2_converted: 89982 # ODC-BY | |
# ai2-adapt-dev/sciriff_converted: 10000 # ODC-BY | |
# ai2-adapt-dev/no_robots_converted: 9500 # NC | |
# AI-MO/NuminaMath-TIR: 72441 # NC | |
max_seq_length: 2048 | |
preprocessing_num_workers: 128 | |
per_device_train_batch_size: 1 # note, this is set up for 8 GPUs | |
gradient_accumulation_steps: 4 # effective batch size 128 with 4 nodes | |
learning_rate: 5.0e-06 | |
lr_scheduler_type: linear | |
warmup_ratio: 0.03 | |
weight_decay: 0.0 | |
num_train_epochs: 2 | |
output_dir: /output/ | |
with_tracking: true | |
report_to: | |
- wandb | |
logging_steps: 1 | |
checkpointing_steps: epoch | |
dataset_mix_dir: /output/ | |
add_bos: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model_name_or_path: /model | |
use_flash_attn: true | |
tokenizer_name: /model | |
use_slow_tokenizer: true | |
dataset_mixer: | |
# General datasets: | |
/dataset/mixed_ds_train.json: 1.0 | |
max_seq_length: 2048 | |
preprocessing_num_workers: 128 | |
per_device_train_batch_size: 1 # note, this is set up for 8 GPUs | |
gradient_accumulation_steps: 4 # effective batch size 128 with 4 nodes | |
learning_rate: 5.0e-06 | |
lr_scheduler_type: linear | |
warmup_ratio: 0.03 | |
weight_decay: 0.0 | |
num_train_epochs: 2 | |
output_dir: /output/ | |
with_tracking: true | |
report_to: | |
- wandb | |
logging_steps: 1 | |
checkpointing_steps: epoch | |
dataset_mix_dir: /output/ | |
add_bos: true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment