maxim-saplin · August 20, 2024 12:00
diff --git a/falcon_mamaba_7b.py b/falcon_mamaba_7b.py
 # pip install -U git+https://github.com/huggingface/transformers.git

 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import torch
 import time


 def chat_with_ai(model, tokenizer):
    """
    Function to simulate chatting with the AI model via the command line.
    """
    # Load model into pipeline
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
    print_welcome()

    # Chat loop
    conversation = []  # Initialize conversation history

    while True:
        input_text = input("\033[1;36muser:\033[0m ")
        if input_text == "quit":
            break

        user_message = {"role": "user", "content": input_text}
        # Add user message to conversation history
        conversation.append(user_message)

        start_time = time.time()
        response = pipe(
            conversation,
            max_new_tokens=2048,
            do_sample=True,
            temperature=0.01,
            # repetition_penalty=1.3,
        )
        end_time = time.time()

        print("\033[H\033[J")  # Clear the screen
        print_welcome()
        conversation = response[0]["generated_text"]
        num_tokens = len(tokenizer.tokenize(conversation[-1]["content"]))
        for message in conversation:
            print(f"\033[1;36m{message['role']}\033[0m: {message['content']}")

        tokens_per_second = num_tokens / (end_time - start_time)
        print(f"\033[1;31m{tokens_per_second:.2f} tokens per second")


 def print_welcome():
    print("\033[1;43mAI Chat Interface. Type 'quit' to exit.\033[0m")


 if __name__ == "__main__":
    tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-mamba-7b")
    model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b", torch_dtype=torch.bfloat16).to(0)

    model = torch.compile(model)

    chat_with_ai(model, tokenizer)
	# pip install -U git+https://github.com/huggingface/transformers.git

	from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
	import torch
	import time


	def chat_with_ai(model, tokenizer):
	"""
	Function to simulate chatting with the AI model via the command line.
	"""
	# Load model into pipeline
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
	print_welcome()

	# Chat loop
	conversation = [] # Initialize conversation history

	while True:
	input_text = input("\033[1;36muser:\033[0m ")
	if input_text == "quit":
	break

	user_message = {"role": "user", "content": input_text}
	# Add user message to conversation history
	conversation.append(user_message)

	start_time = time.time()
	response = pipe(
	conversation,
	max_new_tokens=2048,
	do_sample=True,
	temperature=0.01,
	# repetition_penalty=1.3,
	)
	end_time = time.time()

	print("\033[H\033[J") # Clear the screen
	print_welcome()
	conversation = response[0]["generated_text"]
	num_tokens = len(tokenizer.tokenize(conversation[-1]["content"]))
	for message in conversation:
	print(f"\033[1;36m{message['role']}\033[0m: {message['content']}")

	tokens_per_second = num_tokens / (end_time - start_time)
	print(f"\033[1;31m{tokens_per_second:.2f} tokens per second")


	def print_welcome():
	print("\033[1;43mAI Chat Interface. Type 'quit' to exit.\033[0m")


	if __name__ == "__main__":
	tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-mamba-7b")
	model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-mamba-7b", torch_dtype=torch.bfloat16).to(0)

	model = torch.compile(model)

	chat_with_ai(model, tokenizer)