ayunami2000 · January 22, 2023 20:53
diff --git a/bloom.sh b/bloom.sh
 #!/bin/bash
 if [ $# -eq 0 ]; then
  dnf install -y --disableplugin=subscription-manager make git
  dnf clean all --disableplugin=subscription-manager
  
  curl -L -o ./miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
  chmod +x ./miniconda.sh
  ./miniconda.sh -b -p /opt/conda
  rm ./miniconda.sh

  export PYTHON_VERSION=3.9
  export PATH=/opt/conda/envs/inference/bin:/opt/conda/bin:$PATH
  
  conda create -n inference python=$PYTHON_VERSION pip -y
  
  conda run -n inference /bin/bash -c "./bloom.sh conda"
 else
  conda update -n base -c defaults conda -y
  
  conda install -c anaconda cmake -y
  
  conda update -n base -c defaults conda -y
  
  pip install torch==1.10.2+cu111 --extra-index-url https://download.pytorch.org/whl/cu111 transformers==4.25.1 deepspeed==0.8.0 accelerate==0.15.0 gunicorn==20.1.0 flask flask_api fastapi==0.89.1 uvicorn==0.19.0 jinja2==3.1.2 pydantic==1.10.2 huggingface_hub==0.10.1 grpcio-tools==1.50.0 --no-cache-dir
  
  conda clean -ya
  
  export TRANSFORMERS_CACHE=/cos/HF_cache
  export HUGGINGFACE_HUB_CACHE=$TRANSFORMERS_CACHE
  
  mkdir /src
  cd /src
  
  chmod -R g+w /src
  
  mkdir /.cache
  chmod -R g+w /.cache
  
  export PORT=5000
  export UI_PORT=5001
  
  git clone https://github.com/huggingface/transformers-bloom-inference.git
  cd transformers-bloom-inference
  make gen-proto
  make ui
  TOKENIZERS_PARALLELISM=false MODEL_NAME=microsoft/bloom-deepspeed-inference-fp16 MODEL_CLASS=AutoModelForCausalLM DEPLOYMENT_FRAMEWORK=ds_inference DTYPE=fp16 MAX_INPUT_LENGTH=2048 MAX_BATCH_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 gunicorn -t 0 -w 1 -b 127.0.0.1:5000 inference_server.server:app --access-logfile - --access-logformat '%(h)s %(t)s "%(r)s" %(s)s %(b)s'
 fi
	#!/bin/bash
	if [ $# -eq 0 ]; then
	dnf install -y --disableplugin=subscription-manager make git
	dnf clean all --disableplugin=subscription-manager

	curl -L -o ./miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
	chmod +x ./miniconda.sh
	./miniconda.sh -b -p /opt/conda
	rm ./miniconda.sh

	export PYTHON_VERSION=3.9
	export PATH=/opt/conda/envs/inference/bin:/opt/conda/bin:$PATH

	conda create -n inference python=$PYTHON_VERSION pip -y

	conda run -n inference /bin/bash -c "./bloom.sh conda"
	else
	conda update -n base -c defaults conda -y

	conda install -c anaconda cmake -y

	conda update -n base -c defaults conda -y

	pip install torch==1.10.2+cu111 --extra-index-url https://download.pytorch.org/whl/cu111 transformers==4.25.1 deepspeed==0.8.0 accelerate==0.15.0 gunicorn==20.1.0 flask flask_api fastapi==0.89.1 uvicorn==0.19.0 jinja2==3.1.2 pydantic==1.10.2 huggingface_hub==0.10.1 grpcio-tools==1.50.0 --no-cache-dir

	conda clean -ya

	export TRANSFORMERS_CACHE=/cos/HF_cache
	export HUGGINGFACE_HUB_CACHE=$TRANSFORMERS_CACHE

	mkdir /src
	cd /src

	chmod -R g+w /src

	mkdir /.cache
	chmod -R g+w /.cache

	export PORT=5000
	export UI_PORT=5001

	git clone https://github.com/huggingface/transformers-bloom-inference.git
	cd transformers-bloom-inference
	make gen-proto
	make ui
	TOKENIZERS_PARALLELISM=false MODEL_NAME=microsoft/bloom-deepspeed-inference-fp16 MODEL_CLASS=AutoModelForCausalLM DEPLOYMENT_FRAMEWORK=ds_inference DTYPE=fp16 MAX_INPUT_LENGTH=2048 MAX_BATCH_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 gunicorn -t 0 -w 1 -b 127.0.0.1:5000 inference_server.server:app --access-logfile - --access-logformat '%(h)s %(t)s "%(r)s" %(s)s %(b)s'
	fi