hoosierEE · August 9, 2024 17:24
diff --git a/local_llama.py b/local_llama.py
 #!/usr/local/bin/env python3
 import openai
 import sys

 # inspired by https://two-wrongs.com/q

 # first download llamafile (https://github.com/Mozilla-Ocho/llamafile)
 # then run it in another terminal:
 #    sh -c ./llava-v1.5-7b-q4.llamafile
 # 
 # make this script executable (chmod +x local_llama.py) and put it somewhere on your PATH.
 # 
 # now you can ask LLM questions without leaving the command line:
 #    local_llama.py "write a python program to find the nth fibonacci number"
 #    local_llama.py "summarize this document for me: $(cat README.md)"
 client = openai.OpenAI(
    base_url="http://localhost:8080/v1", # "http://<Your api-server IP>:port"
    api_key = "sk-no-key-required"
 )

 def get_prompts():
    if args:=sys.argv[1:]:
        return [{"role":"user", "content":x} for x in args]
    else:
        return [{"role":"user", "content":open(0).read()}] # stdin

 stream = client.chat.completions.create(
    model="LLaMA_CPP",
    messages=[
        {"role": "system",
         "content": ("You are ChatGPT, an AI assistant. Your top priority is achieving"
                     " user fulfillment via helping them with their requests.")},
        *get_prompts()
    ],
    stream=True)

 # stream results chunk-by-chunk as they arrive
 for chunk in stream:
    print(chunk.choices[0].delta.content or "", end="")
	#!/usr/local/bin/env python3
	import openai
	import sys

	# inspired by https://two-wrongs.com/q

	# first download llamafile (https://github.com/Mozilla-Ocho/llamafile)
	# then run it in another terminal:
	# sh -c ./llava-v1.5-7b-q4.llamafile
	#
	# make this script executable (chmod +x local_llama.py) and put it somewhere on your PATH.
	#
	# now you can ask LLM questions without leaving the command line:
	# local_llama.py "write a python program to find the nth fibonacci number"
	# local_llama.py "summarize this document for me: $(cat README.md)"
	client = openai.OpenAI(
	base_url="http://localhost:8080/v1", # "http://<Your api-server IP>:port"
	api_key = "sk-no-key-required"
	)

	def get_prompts():
	if args:=sys.argv[1:]:
	return [{"role":"user", "content":x} for x in args]
	else:
	return [{"role":"user", "content":open(0).read()}] # stdin

	stream = client.chat.completions.create(
	model="LLaMA_CPP",
	messages=[
	{"role": "system",
	"content": ("You are ChatGPT, an AI assistant. Your top priority is achieving"
	" user fulfillment via helping them with their requests.")},
	*get_prompts()
	],
	stream=True)

	# stream results chunk-by-chunk as they arrive
	for chunk in stream:
	print(chunk.choices[0].delta.content or "", end="")