Last active
January 22, 2023 20:53
-
-
Save ayunami2000/c0805b278ec535f70a5242c12ccaee66 to your computer and use it in GitHub Desktop.
run bloom
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ $# -eq 0 ]; then | |
dnf install -y --disableplugin=subscription-manager make git | |
dnf clean all --disableplugin=subscription-manager | |
curl -L -o ./miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh | |
chmod +x ./miniconda.sh | |
./miniconda.sh -b -p /opt/conda | |
rm ./miniconda.sh | |
export PYTHON_VERSION=3.9 | |
export PATH=/opt/conda/envs/inference/bin:/opt/conda/bin:$PATH | |
conda create -n inference python=$PYTHON_VERSION pip -y | |
conda run -n inference /bin/bash -c "./bloom.sh conda" | |
else | |
conda update -n base -c defaults conda -y | |
conda install -c anaconda cmake -y | |
conda update -n base -c defaults conda -y | |
pip install torch==1.10.2+cu111 --extra-index-url https://download.pytorch.org/whl/cu111 transformers==4.25.1 deepspeed==0.8.0 accelerate==0.15.0 gunicorn==20.1.0 flask flask_api fastapi==0.89.1 uvicorn==0.19.0 jinja2==3.1.2 pydantic==1.10.2 huggingface_hub==0.10.1 grpcio-tools==1.50.0 --no-cache-dir | |
conda clean -ya | |
export TRANSFORMERS_CACHE=/cos/HF_cache | |
export HUGGINGFACE_HUB_CACHE=$TRANSFORMERS_CACHE | |
mkdir /src | |
cd /src | |
chmod -R g+w /src | |
mkdir /.cache | |
chmod -R g+w /.cache | |
export PORT=5000 | |
export UI_PORT=5001 | |
git clone https://github.com/huggingface/transformers-bloom-inference.git | |
cd transformers-bloom-inference | |
make gen-proto | |
make ui | |
TOKENIZERS_PARALLELISM=false MODEL_NAME=microsoft/bloom-deepspeed-inference-fp16 MODEL_CLASS=AutoModelForCausalLM DEPLOYMENT_FRAMEWORK=ds_inference DTYPE=fp16 MAX_INPUT_LENGTH=2048 MAX_BATCH_SIZE=4 CUDA_VISIBLE_DEVICES=0,1,2,3 gunicorn -t 0 -w 1 -b 127.0.0.1:5000 inference_server.server:app --access-logfile - --access-logformat '%(h)s %(t)s "%(r)s" %(s)s %(b)s' | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment