This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py), | |
# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs. | |
# | |
# Usage: | |
# - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate` | |
# - Install deepspeed: `pip install deepspeed==0.9.5` | |
# - Install TRL from main: pip install git+https://github.com/huggingface/trl.git | |
# - Clone the repo: git clone github.com/huggingface/trl.git | |
# - Copy this Gist into trl/examples/scripts | |
# - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def layernorm_forward(x, gamma, beta, ln_param): | |
""" | |
Forward pass for layer normalization. | |
During both training and test-time, the incoming data is normalized per data-point, | |
before being scaled by gamma and beta parameters identical to that of batch normalization. | |
Note that in contrast to batch normalization, the behavior during train and test-time for | |
layer normalization are identical, and we do not need to keep track of running averages | |
of any sort. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.distributed as dist | |
import os | |
local_rank = int(os.environ["LOCAL_RANK"]) | |
dist.init_process_group(backend='nccl') | |
torch.cuda.set_device(local_rank) | |
device = torch.device("cuda", local_rank) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
def compose_actions(*actions): | |
"""Compose many argparse actions into one callable action. | |
Args: | |
*actions: The actions to compose. | |
Returns: | |
argparse.Action: Composed action. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python -m torch.distributed.launch --nproc_per_node=2 all_reduce_bench.py | |
import torch | |
import torch.distributed as dist | |
import time | |
import argparse | |
import os | |
import fcntl | |
TRIALS = 5 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os,requests,platform,json,subprocess | |
import tarfile | |
from zipfile import ZipFile | |
debug=False | |
os_type=platform.system().lower() | |
machine_type=platform.machine().lower() | |
if debug:print(f'Your OS and Machine Type is {os_type} and {machine_type}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import print_function | |
# | |
# A simple CGI script useful for debugging GitHub web hooks | |
# https://developer.github.com/webhooks/ | |
# | |
import hashlib, hmac, json, os, sys, traceback | |
from subprocess import Popen, PIPE, STDOUT |
NewerOlder