joey00072 · July 1, 2024 22:03
diff --git a/dev.py b/dev.py
 from transformers import AutoTokenizer
 from transformers import LlamaConfig, LlamaForCausalLM
 import torch

 model_name = "TinyLlama/TinyLlama_v1.1"

 config = LlamaConfig.from_pretrained(model_name,attn_implementation="eager")

 # injecting customs values in cfg
 customs = {"segment_size":128,"delta_update":True,"use_cache":False}
 config.__dict__.update(customs)

 model = LlamaForCausalLM(config)
 print(model)

 original_model = LlamaForCausalLM.from_pretrained(model_name)
 model.load_state_dict(original_model.state_dict(),strict=False)
 tokenizer = AutoTokenizer.from_pretrained(model_name)

 prompt = "September 2007In high school I decided I was going to study philosophy in college."
 # I had several motives, some more honorable than others.  One of the "
 # less honorable was to shock people.  College was regarded as job \
 # training where I grew up, so studying philosophy seemed an impressively \
 # impractical thing to do.  Sort of like slashing holes in your clothes \
 # or putting a safety pin through your ear, which were other forms \
 # of impressive impracticality then just coming into fashion.But I had some more honest motives as well.  I thought studying \
 # philosophy would be a shortcut straight"

 model = model.half()
 model = model.cuda()

 with torch.device("cuda"):
    inputs = tokenizer([prompt], return_tensors="pt")
    
 print(inputs)
 input_ids = inputs["input_ids"]
 out = model(input_ids)

 output = model.generate(**inputs,max_new_tokens=50)

 print(tokenizer.decode(output.tolist()[0]))
	from transformers import AutoTokenizer
	from transformers import LlamaConfig, LlamaForCausalLM
	import torch

	model_name = "TinyLlama/TinyLlama_v1.1"

	config = LlamaConfig.from_pretrained(model_name,attn_implementation="eager")

	# injecting customs values in cfg
	customs = {"segment_size":128,"delta_update":True,"use_cache":False}
	config.__dict__.update(customs)

	model = LlamaForCausalLM(config)
	print(model)

	original_model = LlamaForCausalLM.from_pretrained(model_name)
	model.load_state_dict(original_model.state_dict(),strict=False)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	prompt = "September 2007In high school I decided I was going to study philosophy in college."
	# I had several motives, some more honorable than others. One of the "
	# less honorable was to shock people. College was regarded as job \
	# training where I grew up, so studying philosophy seemed an impressively \
	# impractical thing to do. Sort of like slashing holes in your clothes \
	# or putting a safety pin through your ear, which were other forms \
	# of impressive impracticality then just coming into fashion.But I had some more honest motives as well. I thought studying \
	# philosophy would be a shortcut straight"

	model = model.half()
	model = model.cuda()

	with torch.device("cuda"):
	inputs = tokenizer([prompt], return_tensors="pt")

	print(inputs)
	input_ids = inputs["input_ids"]
	out = model(input_ids)

	output = model.generate(**inputs,max_new_tokens=50)

	print(tokenizer.decode(output.tolist()[0]))