Steve-Tech · June 22, 2023 13:02
diff --git a/fastchat-t5.py b/fastchat-t5.py
 # Code modified from https://huggingface.co/CarperAI/stable-vicuna-13b-delta
 # in order to use Intel Extension for PyTorch (IPEX) to run on Intel Arc GPUs
 # and for fastchat-t5-3b-v1.0, since it can fit in 8GB of VRAM using BF16
 # or 16GB of VRAM using FP32. IPEX/Arc does not seem to support bare FP16.

 # Follow the instructions here to install IPEX:
 # https://intel.github.io/intel-extension-for-pytorch/xpu/1.13.120+xpu/tutorials/installation.html
 # And remember to source setvars.sh before running this script:
 # `source {ONEAPI_ROOT}/setvars.sh` e.g. `source /opt/intel/oneapi/setvars.sh`

 import torch
 from transformers import T5Tokenizer, AutoModelForSeq2SeqLM
 import intel_extension_for_pytorch as ipex

 tokenizer = T5Tokenizer.from_pretrained("../fastchat-t5-3b-v1.0/")
 model = AutoModelForSeq2SeqLM.from_pretrained("../fastchat-t5-3b-v1.0/", low_cpu_mem_usage=True)

 model = model.type(torch.bfloat16)  # Comment out this line to use FP32

 model = model.to("xpu")
 model = torch.xpu.optimize(model, inplace=True)

 prompt = """\
 ### Human: Write a Python script for text classification using Transformers and PyTorch
 ### Assistant:\
 """

 inputs = tokenizer(prompt, return_tensors='pt').to('xpu')
 tokens = model.generate(
 **inputs,
 max_new_tokens=256,
 do_sample=True,
 temperature=1.0,
 top_p=1.0,
 )
 print(tokenizer.decode(tokens[0], skip_special_tokens=True))
	# Code modified from https://huggingface.co/CarperAI/stable-vicuna-13b-delta
	# in order to use Intel Extension for PyTorch (IPEX) to run on Intel Arc GPUs
	# and for fastchat-t5-3b-v1.0, since it can fit in 8GB of VRAM using BF16
	# or 16GB of VRAM using FP32. IPEX/Arc does not seem to support bare FP16.

	# Follow the instructions here to install IPEX:
	# https://intel.github.io/intel-extension-for-pytorch/xpu/1.13.120+xpu/tutorials/installation.html
	# And remember to source setvars.sh before running this script:
	# `source {ONEAPI_ROOT}/setvars.sh` e.g. `source /opt/intel/oneapi/setvars.sh`

	import torch
	from transformers import T5Tokenizer, AutoModelForSeq2SeqLM
	import intel_extension_for_pytorch as ipex

	tokenizer = T5Tokenizer.from_pretrained("../fastchat-t5-3b-v1.0/")
	model = AutoModelForSeq2SeqLM.from_pretrained("../fastchat-t5-3b-v1.0/", low_cpu_mem_usage=True)

	model = model.type(torch.bfloat16) # Comment out this line to use FP32

	model = model.to("xpu")
	model = torch.xpu.optimize(model, inplace=True)

	prompt = """\
	### Human: Write a Python script for text classification using Transformers and PyTorch
	### Assistant:\
	"""

	inputs = tokenizer(prompt, return_tensors='pt').to('xpu')
	tokens = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=True,
	temperature=1.0,
	top_p=1.0,
	)
	print(tokenizer.decode(tokens[0], skip_special_tokens=True))