Skip to content

Instantly share code, notes, and snippets.

@joey00072
Created July 1, 2024 22:03
Show Gist options
  • Save joey00072/619352815180f7c8e1ed25b1c8ffbbee to your computer and use it in GitHub Desktop.
Save joey00072/619352815180f7c8e1ed25b1c8ffbbee to your computer and use it in GitHub Desktop.
from transformers import AutoTokenizer
from transformers import LlamaConfig, LlamaForCausalLM
import torch
model_name = "TinyLlama/TinyLlama_v1.1"
config = LlamaConfig.from_pretrained(model_name,attn_implementation="eager")
# injecting customs values in cfg
customs = {"segment_size":128,"delta_update":True,"use_cache":False}
config.__dict__.update(customs)
model = LlamaForCausalLM(config)
print(model)
original_model = LlamaForCausalLM.from_pretrained(model_name)
model.load_state_dict(original_model.state_dict(),strict=False)
tokenizer = AutoTokenizer.from_pretrained(model_name)
prompt = "September 2007In high school I decided I was going to study philosophy in college."
# I had several motives, some more honorable than others. One of the "
# less honorable was to shock people. College was regarded as job \
# training where I grew up, so studying philosophy seemed an impressively \
# impractical thing to do. Sort of like slashing holes in your clothes \
# or putting a safety pin through your ear, which were other forms \
# of impressive impracticality then just coming into fashion.But I had some more honest motives as well. I thought studying \
# philosophy would be a shortcut straight"
model = model.half()
model = model.cuda()
with torch.device("cuda"):
inputs = tokenizer([prompt], return_tensors="pt")
print(inputs)
input_ids = inputs["input_ids"]
out = model(input_ids)
output = model.generate(**inputs,max_new_tokens=50)
print(tokenizer.decode(output.tolist()[0]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment