HDCharles · June 4, 2024 20:21 · andrewor14 · Jun 4, 2024
diff --git a/eval_script.py b/eval_script.py
 import torch

 from transformers import AutoModelForCausalLM, AutoTokenizer

 from lm_eval.models.huggingface import HFLM
 from lm_eval.evaluator import evaluate
 from lm_eval.tasks import get_task_dict


 path_to_hf_checkpoint = "/home/cdhernandez/local/gpt-fast/checkpoints/meta-llama/Meta-Llama-3-8B"
 task_list = ["wikitext"]
 device = "cuda"
 precision = torch.bfloat16

 tokenizer = AutoTokenizer.from_pretrained(path_to_hf_checkpoint)
 model = AutoModelForCausalLM.from_pretrained(path_to_hf_checkpoint).to(device="cuda", dtype=precision)

 from torchao.quantization.quant_api import change_linear_weights_to_int4_woqtensors

 # your API Here
 change_linear_weights_to_int4_woqtensors(model)

 with torch.no_grad():
    result = evaluate(
        HFLM(pretrained=model, tokenizer=tokenizer),
        get_task_dict(task_list),
        limit = 10
    )
 for task, res in result["results"].items():
    print(f"{task}: {res}")
	import torch

	from transformers import AutoModelForCausalLM, AutoTokenizer

	from lm_eval.models.huggingface import HFLM
	from lm_eval.evaluator import evaluate
	from lm_eval.tasks import get_task_dict


	path_to_hf_checkpoint = "/home/cdhernandez/local/gpt-fast/checkpoints/meta-llama/Meta-Llama-3-8B"
	task_list = ["wikitext"]
	device = "cuda"
	precision = torch.bfloat16

	tokenizer = AutoTokenizer.from_pretrained(path_to_hf_checkpoint)
	model = AutoModelForCausalLM.from_pretrained(path_to_hf_checkpoint).to(device="cuda", dtype=precision)

	from torchao.quantization.quant_api import change_linear_weights_to_int4_woqtensors

	# your API Here
	change_linear_weights_to_int4_woqtensors(model)

	with torch.no_grad():
	result = evaluate(
	HFLM(pretrained=model, tokenizer=tokenizer),
	get_task_dict(task_list),
	limit = 10
	)
	for task, res in result["results"].items():
	print(f"{task}: {res}")