Created
June 22, 2023 13:02
-
-
Save Steve-Tech/07039f7db7a64cdbf0e18f435d56c8ed to your computer and use it in GitHub Desktop.
Intel Extension for PyTorch LLM Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code modified from https://huggingface.co/CarperAI/stable-vicuna-13b-delta | |
# in order to use Intel Extension for PyTorch (IPEX) to run on Intel Arc GPUs | |
# and for fastchat-t5-3b-v1.0, since it can fit in 8GB of VRAM using BF16 | |
# or 16GB of VRAM using FP32. IPEX/Arc does not seem to support bare FP16. | |
# Follow the instructions here to install IPEX: | |
# https://intel.github.io/intel-extension-for-pytorch/xpu/1.13.120+xpu/tutorials/installation.html | |
# And remember to source setvars.sh before running this script: | |
# `source {ONEAPI_ROOT}/setvars.sh` e.g. `source /opt/intel/oneapi/setvars.sh` | |
import torch | |
from transformers import T5Tokenizer, AutoModelForSeq2SeqLM | |
import intel_extension_for_pytorch as ipex | |
tokenizer = T5Tokenizer.from_pretrained("../fastchat-t5-3b-v1.0/") | |
model = AutoModelForSeq2SeqLM.from_pretrained("../fastchat-t5-3b-v1.0/", low_cpu_mem_usage=True) | |
model = model.type(torch.bfloat16) # Comment out this line to use FP32 | |
model = model.to("xpu") | |
model = torch.xpu.optimize(model, inplace=True) | |
prompt = """\ | |
### Human: Write a Python script for text classification using Transformers and PyTorch | |
### Assistant:\ | |
""" | |
inputs = tokenizer(prompt, return_tensors='pt').to('xpu') | |
tokens = model.generate( | |
**inputs, | |
max_new_tokens=256, | |
do_sample=True, | |
temperature=1.0, | |
top_p=1.0, | |
) | |
print(tokenizer.decode(tokens[0], skip_special_tokens=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment