Skip to content

Instantly share code, notes, and snippets.

@Frank-Buss
Last active December 14, 2023 01:13
Show Gist options
  • Save Frank-Buss/173fab1fc935b6d184b4b8454c254b06 to your computer and use it in GitHub Desktop.
Save Frank-Buss/173fab1fc935b6d184b4b8454c254b06 to your computer and use it in GitHub Desktop.
Sample script how to run the uncensored WizardLM LLM
#!/usr/bin/env python3
#
# Test script for the uncensored WizardLM model:
# https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GGML
#
# Tested on Mac Mini M1, with 16 GB RAM. Needs some libraries:
#
# pip install torch transformers accelerate bitsandbytes
# CT_METAL=1 pip install ctransformers --no-binary ctransformers
#
# On CUDA systems, use this to install ctransformers:
# pip install ctransformers[cuda]
#
# It generates about 10 tokens per second on a Mac.
# On Windows with a RTX3080, about 20 tokens per second.
from ctransformers import AutoModelForCausalLM
# initialize model
llm = AutoModelForCausalLM.from_pretrained(
"TheBloke/WizardLM-7B-uncensored-GGUF",
model_file="WizardLM-7B-uncensored.Q4_K_M.gguf",
model_type="llama",
gpu_layers=50,
max_new_tokens = 1000,
context_length = 6000)
def generate_and_print(llm, tokens):
try:
for token in llm.generate(tokens):
print(llm.detokenize(token), end='', flush=True)
except KeyboardInterrupt:
print("\nOutput interrupted by user. Enter a new prompt.")
return
while True:
try:
user_input = input("\nEnter your prompt (ctrl-c to exit) : ")
if user_input.lower() == 'exit':
break
tokens = llm.tokenize(user_input)
generate_and_print(llm, tokens)
except KeyboardInterrupt:
print("\nProgram interrupted by user. Exiting...")
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment