python3 test_diff_stages.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting. | |
| # https://developer.nvidia.com/nsight-systems | |
| # https://docs.nvidia.com/nsight-systems/profiling/index.html | |
| # My preferred nsys (command line executable used to create profiles) commands | |
| # | |
| # In your script, write | |
| # torch.cuda.nvtx.range_push("region name") | |
| # ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 1. 告诉 tmux 内部使用正确的终端类型 | |
| set -g default-terminal "tmux-256color" | |
| # 2. 开启 True Color 支持 | |
| # 因为 Termux 外部报 xterm-256color,我们需要匹配这个外壳并开启 RGB | |
| set -as terminal-features ",xterm-256color:RGB" | |
| # 3. 确保 Emacs 这种程序能正确识别并使用 | |
| set -as terminal-overrides ",xterm-256color:Tc" | |
| # Undercurl |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Install packer | |
| local install_path = vim.fn.stdpath 'data' .. '/site/pack/packer/start/packer.nvim' | |
| local is_bootstrap = false | |
| if vim.fn.empty(vim.fn.glob(install_path)) > 0 then | |
| is_bootstrap = true | |
| vim.fn.system { 'git', 'clone', '--depth', '1', 'https://github.com/wbthomason/packer.nvim', install_path } | |
| vim.cmd [[packadd packer.nvim]] | |
| end | |
| require('packer').startup(function(use) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| call to _all_gather_base with c10d._coalescing_manager | |
| Test command: | |
| mpirun -np $1 -N ${ndev_per_node} --hostfile ${HOST_FILE} \ | |
| --mca plm_rsh_no_tree_spawn 1 \ | |
| -mca btl tcp,self --mca btl_tcp_if_exclude lo,docker0 \ | |
| --mca pml ^cm \ | |
| -bind-to none \ | |
| --tag-output \ | |
| -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH \ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/csrc/lamb/fused_lamb_cuda_kernel.cu b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
| index e934b69c..207faa39 100644 | |
| --- a/csrc/lamb/fused_lamb_cuda_kernel.cu | |
| +++ b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
| @@ -8,7 +8,7 @@ | |
| #include "ATen/cuda/CUDAContext.h" | |
| #include "ATen/cuda/detail/IndexUtils.cuh" | |
| //#include "ATen/Type.h" | |
| -#include <THC/THCGeneral.h> | |
| +// #include <THC/THCGeneral.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/csrc/lamb/fused_lamb_cuda_kernel.cu b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
| index 0448a45..ff87993 100644 | |
| --- a/csrc/lamb/fused_lamb_cuda_kernel.cu | |
| +++ b/csrc/lamb/fused_lamb_cuda_kernel.cu | |
| @@ -464,7 +464,7 @@ void fused_lamb_cuda(at::Tensor& p, | |
| lamb_coeff.data<scalar_t>()); | |
| })); | |
| } | |
| - THCudaCheck(cudaGetLastError()); | |
| + AT_CUDA_CHECK(cudaGetLastError()); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 21: M9 P[5, 6] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 5.1e+03, inflight [9] | |
| -gather param for module 3: {'id': 0, 'status': 'AVAILABLE', 'numel': 78151680, 'persist': False, 'active_sub_modules': {3}} | |
| [2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 9 | |
| [2021-07-07 21:16:52,635] [INFO] [stage3.py:42:print_rank_0] module id 9 handle is None | |
| 22: M23 P[] avail 3.1e+08, max_avail 5.0e+07, queue_sz 5.8e+02, n_inflight 7.8e+07, inflight [0, 23, 2, 1, 3] | |
| [2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] wait_for_fetch current submodule id 23 | |
| [2021-07-07 21:16:52,636] [INFO] [stage3.py:42:print_rank_0] module id 23 handle is None | |
| -gather param for module 24: {'id': 151, 'status': 'NOT_AVAILABLE', 'numel': 6553600, 'persist': False, 'active_sub_modules': {24}} | |
| -gather param for module 24: {'id': 152, 'status': 'AVAILABLE', 'numel': 2560, 'persist': True, 'active_sub_modules': {24}} | |
| [2021-07-07 21:16:52,636] [INFO] [utils.py:629:info_rank_ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "train_batch_size": 512, | |
| "train_micro_batch_size_per_gpu": 8, | |
| "steps_per_print": 100, | |
| "prescale_gradients": false, | |
| "bert_token_file": "bert-large-uncased", | |
| "bert_model_config": { | |
| "vocab_size_or_config_json_file": 32003, | |
| "hidden_size": 2560, | |
| "num_hidden_layers": 64, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import argparse | |
| def get_args(): | |
| arg_parser = argparse.ArgumentParser() | |
| arg_parser.add_argument('--file') | |
| args = arg_parser.parse_args() | |
| return args |
NewerOlder