Created
April 22, 2024 02:54
-
-
Save tudoanh/400765880f0eb8b9b45d3ed2ff96f086 to your computer and use it in GitHub Desktop.
Run Llama 3 8B with Llamafile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import subprocess | |
import os | |
import signal | |
def download_file(url, target_path): | |
response = requests.get(url, stream=True) | |
with open(target_path, "wb") as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
def setup_and_run(): | |
# Define file URLs and paths | |
llama_exec_url = "https://github.com/Mozilla-Ocho/llamafile/releases/download/0.7.3/llamafile-0.7.3" | |
llama_exec_path = "llamafile-0.7.3" | |
model_url = "https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q4_0.gguf?download=true" | |
model_path = "dolphin-2.9-llama3-8b.Q4_0.gguf" | |
# Download llamafile executable | |
print("Downloading llamafile executable...") | |
download_file(llama_exec_url, llama_exec_path) | |
# Download model file | |
print("Downloading model file...") | |
download_file(model_url, model_path) | |
# Set the executable permission for llamafile | |
os.chmod(llama_exec_path, 0o755) | |
print("Permissions set: executable") | |
# Prepare the command to run | |
cmd = f"./{llama_exec_path} -m {model_path} -ngl 15 --port 8080 --host 0.0.0.0" | |
print("Starting the server with command:") | |
print(cmd) | |
# Run the command | |
process = subprocess.Popen(cmd, shell=True) | |
print("Server running... Press CTRL-C to stop.") | |
try: | |
# Wait for process to complete or user to kill it | |
process.wait() | |
except KeyboardInterrupt: | |
print("CTRL-C received. Stopping the server...") | |
finally: | |
# Terminate the process if still running | |
process.terminate() | |
try: | |
process.wait(timeout=5) | |
except subprocess.TimeoutExpired: | |
print("Forcing process termination...") | |
process.kill() | |
print("Server stopped.") | |
if __name__ == "__main__": | |
setup_and_run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment