Skip to content

Instantly share code, notes, and snippets.

@maxim-saplin
Created August 20, 2024 12:00
Show Gist options
  • Save maxim-saplin/a23e6b092d643bf2bb6345cbbcb0cecb to your computer and use it in GitHub Desktop.
Save maxim-saplin/a23e6b092d643bf2bb6345cbbcb0cecb to your computer and use it in GitHub Desktop.
Chat to Falcone Mamba 7B
# pip install -U git+https://github.com/huggingface/transformers.git
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch
import time
def chat_with_ai(model, tokenizer):
"""
Function to simulate chatting with the AI model via the command line.
"""
# Load model into pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
print_welcome()
# Chat loop
conversation = [] # Initialize conversation history
while True:
input_text = input("\033[1;36muser:\033[0m ")
if input_text == "quit":
break
user_message = {"role": "user", "content": input_text}
# Add user message to conversation history
conversation.append(user_message)
start_time = time.time()
response = pipe(
conversation,
max_new_tokens=2048,
do_sample=True,
temperature=0.01,
# repetition_penalty=1.3,
)
end_time = time.time()
print("\033[H\033[J") # Clear the screen
print_welcome()
conversation = response[0]["generated_text"]
num_tokens = len(tokenizer.tokenize(conversation[-1]["content"]))
for message in conversation:
print(f"\033[1;36m{message['role']}\033[0m: {message['content']}")
tokens_per_second = num_tokens / (end_time - start_time)
print(f"\033[1;31m{tokens_per_second:.2f} tokens per second")
def print_welcome():
print("\033[1;43mAI Chat Interface. Type 'quit' to exit.\033[0m")
if __name__ == "__main__":
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-mamba-7b")
model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b", torch_dtype=torch.bfloat16).to(0)
model = torch.compile(model)
chat_with_ai(model, tokenizer)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment