niw · August 27, 2023 13:26
diff --git a/test.py b/test.py
 # Usage
 # =====
 #
 # ## Prerequisite
 #
 # Prepare Python 3, for exmaple, install Homebrew and `brew install python`.
 #
 # ## Install dependencies
 # 
 # $ python3 -m venv .venv
 # $ .venv/bin/pip3 install --pre --index-url https://download.pytorch.org/whl/nightly/cpu torch
 # $ .venv/bin/pip3 install git+https://github.com/huggingface/transformers.git
 #
 # Due to known issue, need to use the latest pytorch to use MPS.
 #
 # ## Patch Transformers
 #
 # Patch `lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py` to change
 # `long()` to `int()` on the line `position_ids = attention_mask.long().cumsum(-1) - 1`.
 # This is required to use MPS.
 #
 # ## Run test script on Python REPL
 #
 # $ .venv/bin/python3 -i test.py
 # >>> gen('Write a function prints "Hello World"')
 #
 # First time when you run this script, it downloads large model and metadata in `~/.cache`.
 # Be on the faster internet and prepare storage.

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer

 device = "mps"

 model_name = "codellama/CodeLlama-7b-Instruct-hf"

 tokenizer = AutoTokenizer.from_pretrained(model_name)

 model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
 )
 #print(model)
 model.to(device)

 @torch.no_grad()
 def gen(instruction, system=None):
    if system is not None:
        instruction = f"<<SYS>>\n{system}\n<</SYS>>\n\n{instruction}"
    prompt = f"[INST] {instruction} [/INST]"
    print(prompt)

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
    )
    inputs.to(device)
    #print(inputs)

    tokens = model.generate(
        **inputs,
        max_new_tokens=128,
        temperature=0.2,
        do_sample=True,
    )
    #print(tokens)
    print(tokenizer.decode(tokens[0], skip_special_tokens=True))

 # gen(
 #     'Write a function that computes the set of sums of all contiguous sublists of a given list in Python.',
 #     system="Provide answers in JavaScript"
 # )
	# Usage
	# =====
	#
	# ## Prerequisite
	#
	# Prepare Python 3, for exmaple, install Homebrew and `brew install python`.
	#
	# ## Install dependencies
	#
	# $ python3 -m venv .venv
	# $ .venv/bin/pip3 install --pre --index-url https://download.pytorch.org/whl/nightly/cpu torch
	# $ .venv/bin/pip3 install git+https://github.com/huggingface/transformers.git
	#
	# Due to known issue, need to use the latest pytorch to use MPS.
	#
	# ## Patch Transformers
	#
	# Patch `lib/python3.11/site-packages/transformers/models/llama/modeling_llama.py` to change
	# `long()` to `int()` on the line `position_ids = attention_mask.long().cumsum(-1) - 1`.
	# This is required to use MPS.
	#
	# ## Run test script on Python REPL
	#
	# $ .venv/bin/python3 -i test.py
	# >>> gen('Write a function prints "Hello World"')
	#
	# First time when you run this script, it downloads large model and metadata in `~/.cache`.
	# Be on the faster internet and prepare storage.

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	device = "mps"

	model_name = "codellama/CodeLlama-7b-Instruct-hf"

	tokenizer = AutoTokenizer.from_pretrained(model_name)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	trust_remote_code=True,
	)
	#print(model)
	model.to(device)

	@torch.no_grad()
	def gen(instruction, system=None):
	if system is not None:
	instruction = f"<<SYS>>\n{system}\n<</SYS>>\n\n{instruction}"
	prompt = f"[INST] {instruction} [/INST]"
	print(prompt)

	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	)
	inputs.to(device)
	#print(inputs)

	tokens = model.generate(
	**inputs,
	max_new_tokens=128,
	temperature=0.2,
	do_sample=True,
	)
	#print(tokens)
	print(tokenizer.decode(tokens[0], skip_special_tokens=True))

	# gen(
	# 'Write a function that computes the set of sums of all contiguous sublists of a given list in Python.',
	# system="Provide answers in JavaScript"
	# )