chaudharyachint08 · April 29, 2023 05:54
diff --git a/Alpa_OPT_66B_setup.yaml b/Alpa_OPT_66B_setup.yaml
 sudo apt-get upgrade -y
 sudo apt-get update -y
 sudo apt-get install gcc g++ make cmake htop iotop tree dkms wget git zip unzip -y


 # Installing Alpa (https://alpa-projects.github.io/install.html)
  # CUDA 11.4 and cuDNN 8.2.0 required
  sudo dnf install gcc gcc-c++ make -y
  sudo dnf install wget git zip unzip -y
  # https://linuxconfig.org/error-unable-to-find-the-kernel-source-tree-for-the-currently-running-kernel-centos-rhel
  sudo dnf install kernel-headers kernel-devel -y
  sudo dnf install elfutils-libelf-devel pkg-config zlib -y
  # if kernel-headers are not matching existing kernel
  sudo dnf distro-sync -y 
  # Installing latest Nvidia driver
  wget https://us.download.nvidia.com/XFree86/Linux-x86_64/515.57/NVIDIA-Linux-x86_64-515.57.run
  sudo reboot
  sudo sh NVIDIA-Linux-x86_64-515.57.run
  # Installing required Cuda version
  wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run
  sudo sh cuda_11.3.0_465.19.01_linux.run
  # Following https://alpa-projects.github.io/install.html
  export PATH=$PATH:/usr/local/cuda-11.3/bin
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.3/lib64
  # cuDNN download (https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html)
  # https://developer.nvidia.com/rdp/cudnn-archive
  wget https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/libcudnn8-8.2.0.53-1.cuda11.3.x86_64.rpm
  wget https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/libcudnn8-devel-8.2.0.53-1.cuda11.3.x86_64.rpm
  sudo rpm -i libcudnn8-8.2.0.53-1.cuda11.3.x86_64.rpm
  sudo rpm -i libcudnn8-devel-8.2.0.53-1.cuda11.3.x86_64.rpm
  sudo dnf clean all
  sudo dnf install libcudnn8-8.2.0.53-1.cuda11.3.x86_64.rpm libcudnn8-devel-8.2.0.53-1.cuda11.3.x86_64.rpm 
  # Installing Anaconda
  wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh
  chmod 777 Anaconda3-2022.05-Linux-x86_64.sh
  ./Anaconda3-2022.05-Linux-x86_64.sh
  sudo reboot
  export PATH=$PATH:/usr/local/cuda-11.3/bin
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.3/lib64

  

  conda update --all --yes
  conda remove -n alpa --all --yes
  conda create -n alpa python=3.9 --yes
  conda activate alpa
  # sudo apt install coinor-cbc
  conda install -c conda-forge coincbc --yes
  pip3 install cupy-cuda113
  python3 -c "from cupy.cuda import nccl"
  # If above does gives any output
  python -m cupyx.tools.install_library --library nccl --cuda 11.3
  # Alpa installation
  pip3 install alpa
  # Below resulting now in an error, Wheels removed from alpa.ai/wheels.html
  pip3 install jaxlib==0.3.5+cuda113.cudnn820 -f https://alpa-projects.github.io/wheels.html
  # pip3 install https://github.com/alpa-projects/alpa/releases/download/v0.1.7/jaxlib-0.3.5%2Bcuda113.cudnn820-cp39-none-manylinux2010_x86_64.whl
  # Checking installation
  ray start --head
  python3 -m alpa.test_install



 # Serving OPT Models (https://alpa.ai/tutorials/opt_serving.html#requirements)
  pip3 install transformers flask cython omegaconf
  # Install torch corresponding to your CUDA version, e.g., for CUDA 11.3:
  pip3 install --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
  # git clone [email protected]:alpa-projects/alpa.git # --recursive (Recusrive need not be used here)
  git clone https://github.com/alpa-projects/alpa.git 
  cd alpa/examples
  pip3 install -e .
  # Downloading Alpa compatible weights for early testing
  pip3 install gdown
  # 30B https://drive.google.com/u/0/uc?id=1_MBcgwTqHFboV0JkGWR03AOHusrxcHlu
  # Benchmark generation code (test if things are running ok)
  cd $HOME/OPT/alpa/examples/opt_serving/benchmark
  # Torch based 125M diagnosis
  cd opt_serving/benchmark/
  python3 benchmark_text_gen.py --model facebook/opt-125m --debug
  # Ray based 125M diagnosis
  python3 benchmark_text_gen.py --model jax/opt-125m --path /home/achint_chaudhary/OPT/meta_alpa_weights --debug
  # Start ray on the node
  ray start --head
  # python3 benchmark_text_gen.py --model alpa/opt-2.7b --path /home/achint_chaudhary/OPT/meta_alpa_weights --debug
  python3 benchmark_text_gen.py --model alpa/opt-30b --path /home/achint_chaudhary/OPT/custom_alpa_weights/30B/ --debug
  python3 benchmark_text_gen.py --model alpa/opt-66b --path /home/achint_chaudhary/OPT/custom_alpa_weights/66B/ --debug



 # Downloading and pre-processing OPT-66B model
  # 30B weigts json file (https://huggingface.co/facebook/opt-30b/blob/main/pytorch_model.bin.index.json)
  # 66B weigts json file (https://huggingface.co/facebook/opt-66b/blob/main/pytorch_model.bin.index.json)
	sudo apt-get upgrade -y
	sudo apt-get update -y
	sudo apt-get install gcc g++ make cmake htop iotop tree dkms wget git zip unzip -y


	# Installing Alpa (https://alpa-projects.github.io/install.html)
	# CUDA 11.4 and cuDNN 8.2.0 required
	sudo dnf install gcc gcc-c++ make -y
	sudo dnf install wget git zip unzip -y
	# https://linuxconfig.org/error-unable-to-find-the-kernel-source-tree-for-the-currently-running-kernel-centos-rhel
	sudo dnf install kernel-headers kernel-devel -y
	sudo dnf install elfutils-libelf-devel pkg-config zlib -y
	# if kernel-headers are not matching existing kernel
	sudo dnf distro-sync -y
	# Installing latest Nvidia driver
	wget https://us.download.nvidia.com/XFree86/Linux-x86_64/515.57/NVIDIA-Linux-x86_64-515.57.run
	sudo reboot
	sudo sh NVIDIA-Linux-x86_64-515.57.run
	# Installing required Cuda version
	wget https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run
	sudo sh cuda_11.3.0_465.19.01_linux.run
	# Following https://alpa-projects.github.io/install.html
	export PATH=$PATH:/usr/local/cuda-11.3/bin
	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.3/lib64
	# cuDNN download (https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html)
	# https://developer.nvidia.com/rdp/cudnn-archive
	wget https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/libcudnn8-8.2.0.53-1.cuda11.3.x86_64.rpm
	wget https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/libcudnn8-devel-8.2.0.53-1.cuda11.3.x86_64.rpm
	sudo rpm -i libcudnn8-8.2.0.53-1.cuda11.3.x86_64.rpm
	sudo rpm -i libcudnn8-devel-8.2.0.53-1.cuda11.3.x86_64.rpm
	sudo dnf clean all
	sudo dnf install libcudnn8-8.2.0.53-1.cuda11.3.x86_64.rpm libcudnn8-devel-8.2.0.53-1.cuda11.3.x86_64.rpm
	# Installing Anaconda
	wget https://repo.anaconda.com/archive/Anaconda3-2022.05-Linux-x86_64.sh
	chmod 777 Anaconda3-2022.05-Linux-x86_64.sh
	./Anaconda3-2022.05-Linux-x86_64.sh
	sudo reboot
	export PATH=$PATH:/usr/local/cuda-11.3/bin
	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.3/lib64



	conda update --all --yes
	conda remove -n alpa --all --yes
	conda create -n alpa python=3.9 --yes
	conda activate alpa
	# sudo apt install coinor-cbc
	conda install -c conda-forge coincbc --yes
	pip3 install cupy-cuda113
	python3 -c "from cupy.cuda import nccl"
	# If above does gives any output
	python -m cupyx.tools.install_library --library nccl --cuda 11.3
	# Alpa installation
	pip3 install alpa
	# Below resulting now in an error, Wheels removed from alpa.ai/wheels.html
	pip3 install jaxlib==0.3.5+cuda113.cudnn820 -f https://alpa-projects.github.io/wheels.html
	# pip3 install https://github.com/alpa-projects/alpa/releases/download/v0.1.7/jaxlib-0.3.5%2Bcuda113.cudnn820-cp39-none-manylinux2010_x86_64.whl
	# Checking installation
	ray start --head
	python3 -m alpa.test_install



	# Serving OPT Models (https://alpa.ai/tutorials/opt_serving.html#requirements)
	pip3 install transformers flask cython omegaconf
	# Install torch corresponding to your CUDA version, e.g., for CUDA 11.3:
	pip3 install --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
	# git clone [email protected]:alpa-projects/alpa.git # --recursive (Recusrive need not be used here)
	git clone https://github.com/alpa-projects/alpa.git
	cd alpa/examples
	pip3 install -e .
	# Downloading Alpa compatible weights for early testing
	pip3 install gdown
	# 30B https://drive.google.com/u/0/uc?id=1_MBcgwTqHFboV0JkGWR03AOHusrxcHlu
	# Benchmark generation code (test if things are running ok)
	cd $HOME/OPT/alpa/examples/opt_serving/benchmark
	# Torch based 125M diagnosis
	cd opt_serving/benchmark/
	python3 benchmark_text_gen.py --model facebook/opt-125m --debug
	# Ray based 125M diagnosis
	python3 benchmark_text_gen.py --model jax/opt-125m --path /home/achint_chaudhary/OPT/meta_alpa_weights --debug
	# Start ray on the node
	ray start --head
	# python3 benchmark_text_gen.py --model alpa/opt-2.7b --path /home/achint_chaudhary/OPT/meta_alpa_weights --debug
	python3 benchmark_text_gen.py --model alpa/opt-30b --path /home/achint_chaudhary/OPT/custom_alpa_weights/30B/ --debug
	python3 benchmark_text_gen.py --model alpa/opt-66b --path /home/achint_chaudhary/OPT/custom_alpa_weights/66B/ --debug



	# Downloading and pre-processing OPT-66B model
	# 30B weigts json file (https://huggingface.co/facebook/opt-30b/blob/main/pytorch_model.bin.index.json)
	# 66B weigts json file (https://huggingface.co/facebook/opt-66b/blob/main/pytorch_model.bin.index.json)