export REPO=/p/project/laionize/cherti1/Megatron-LM # MODIFY!
git clone https://github.com/bigcode-project/Megatron-LM.git $REPO
cd $REPO
ml Stages/2024 GCC/12.3.0 OpenMPI CUDA/12 PyTorch/2.1.0-CUDA-12-ALPHA
python -m venv $REPO/env
source $REPO/env/bin/activate
pip install -U pip
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Compute resolution statistics for WebDataset. | |
| This script processes a WebDataset in parallel and counts how many samples | |
| meet various resolution thresholds. It outputs statistics in the format: | |
| >= 256x256 1_055_309_295 | |
| >= 384x384 698_616_282 | |
| ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import argparse | |
| import webdataset as wds | |
| from PIL import Image | |
| import json | |
| import io | |
| import glob | |
| import multiprocessing as mp |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| This is helper script to launch sbatch jobs and to handle two issues | |
| we encountered: | |
| - freezing/hanging | |
| - limited maximum job time (24 hours in the best case, can be 6 hours when total compute budget is over) | |
| The script automatically relaunch the sbatch script when the job either freezes | |
| or is stopped/canceled. |
We can't make this file beautiful and searchable because it's too large.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| image_retrieval_recall@1,text_acc,text_retrieval_recall@5,text_retrieval_recall@1,image_retrieval_recall@5,task,text_retrieval_recall@10,language,acc,acc5,image_retrieval_recall@10,model,dataset,mean_per_class_recall,image_acc,pretrained,acc1,model_fullname | |
| 0.448199987411499,,0.8410000205039978,0.5920000076293945,0.7206000089645386,zeroshot_retrieval,0.8980000019073486,en,,,0.8123999834060669,RN50,wds/flickr30k,,,cc12m,,RN50 cc12m | |
| 0.45840001106262207,,0.8500000238418579,0.5889999866485596,0.7305999994277954,zeroshot_retrieval,0.8980000019073486,en,,,0.8163999915122986,RN50-quickgelu,wds/flickr30k,,,cc12m,,RN50-quickgelu cc12m | |
| 0.5221999883651733,,0.878000020980835,0.6740000247955322,0.7821999788284302,zeroshot_retrieval,0.9309999942779541,en,,,0.864799976348877,ViT-B-16,wds/flickr30k,,,commonpool_l_basic_s1b_b8k,,ViT-B-16 commonpool_l_basic_s1b_b8k | |
| 0.5103999972343445,,0.8889999985694885,0.6830000281333923,0.7735999822616577,zeroshot_retrieval,0.9369999766349792,en,,,0.8521999716758728,ViT-B-16,wds/flickr30k,,, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| import mobileclip | |
| from PIL import Image | |
| import numpy as np | |
| import pandas as pd | |
| from torch.utils.flop_counter import FlopCounterMode | |
| import open_clip | |
| import fvcore | |
| import fvcore.nn |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| #SBATCH --nodes=8 | |
| #SBATCH --time=00:20:00 | |
| #SBATCH --ntasks-per-node=1 | |
| #SBATCH --cpus-per-task=48 | |
| #SBATCH --gres=gpu:4 | |
| #SBATCH --partition=booster | |
| #SBATCH --account=transfernetx | |
| #SBATCH --exclude=jwb[0059,0067,0069,0193,0284,0287,0294,0359,0418,0637,0829,0832,0838,0898,0907,0921,0971,1004,1023,1029,1213,1126] | |
| #SBATCH --threads-per-core=1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| #SBATCH --nodes=8 | |
| #SBATCH --time=00:20:00 | |
| #SBATCH --ntasks-per-node=1 | |
| #SBATCH --cpus-per-task=48 | |
| #SBATCH --gres=gpu:4 | |
| #SBATCH --partition=booster | |
| #SBATCH --account=transfernetx | |
| #SBATCH --exclude=jwb[0059,0067,0069,0193,0284,0287,0294,0359,0418,0637,0829,0832,0838,0898,0907,0921,0971,1004,1023,1029,1213,1126] | |
| #SBATCH --threads-per-core=1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| checkpoints: | |
| checkpoint_interval: 10 | |
| checkpoints_path: checkpoints | |
| checkpoints_path_is_shared_file_system: false | |
| resume_checkpoint_path: null | |
| save_initial_state: false | |
| data_stages: | |
| - name: Stable Training Stage | |
| start_training_step: 1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash -x | |
| #SBATCH --account={account} | |
| #SBATCH --nodes={nodes} | |
| #SBATCH --gres=gpu:4 | |
| #SBATCH --ntasks-per-node=4 | |
| #SBATCH --cpus-per-task=24 | |
| #SBATCH --time=06:00:00 | |
| #SBATCH --partition={partition} | |
| #SBATCH --output={output_file} | |
| echo "Job Id:$SLURM_JOB_ID" |
NewerOlder