up1 · December 9, 2024 09:55
diff --git a/1.txt b/1.txt
 1. ติดตั้ง WasmEdge
 $curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh | bash

 2. Download LLM model เพื่อมา run บนเครื่อง local โดยใช้งาน Llama 3.2, 1b
 $curl -LO https://huggingface.co/second-state/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q5_K_M.gguf

 3. ทำการ Download app chat มาใช้งาน เพื่อทดสอบการถามตอบกับ Local LLM server
 $curl -LO https://github.com/second-state/LlamaEdge/releases/latest/download/llama-chat.wasm

 4. ทำการ run app chat เพื่อใช้งาน model Llama 3.2, 1b ที่ทำการ download มา
 $wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-3.2-1B-Instruct-Q5_K_M.gguf llama-chat.wasm -p llama-3-chat

 [INFO] llama-chat version: 0.14.17
 [INFO] Model name: default
 [INFO] Model alias: default
 [INFO] Prompt template: llama-3-chat
 [INFO] Context size: 512
 [INFO] Number of tokens to predict: 1024
 [INFO] Number of layers to run on the GPU: 100
 [INFO] Threads: 2
 [INFO] Batch size for prompt processing: 512
 [INFO] Temperature for sampling: 1
 [INFO] Penalize repeat sequence of tokens: 1.1
 [INFO] Presence penalty (0.0 = disabled): 0
 [INFO] Frequency penalty (0.0 = disabled): 0
 [INFO] BNF-like grammar: 
 [INFO] Enable prompt log: false
 [INFO] Enable plugin log: false
 [INFO] Temperature for sampling: 1
 [INFO] Wasi-nn-ggml plugin: b4273 (commit c9c6e01d)

 ================================== Running in interactive mode. ===================================

    - Press [Ctrl+C] to interject at any time.
    - Press [Return] to end the input.
    - For multi-line inputs, end each line with '\' and press [Return] to get another line.


 [You]: 
 hello world, how are you ?

 [Bot]:
 Hello, I'm here and ready to help.

 [You]: 
 who are you ?

 [Bot]:
 I'm a computer program designed to assist with questions and tasks.
diff --git a/2.txt b/2.txt
 1. ทำการ Download API server
 $curl -LO https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm

 2. ทำการ download embedding model
 $curl -LO https://huggingface.co/gaianet/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf

 3. ทำการ start API Server
 $wasmedge --dir .:. \
    --nn-preload default:GGML:AUTO:Llama-3.2-1B-Instruct-Q5_K_M.gguf \
    --nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5.f16.gguf \
    llama-api-server.wasm \
    --model-alias default,embedding \
    --model-name llama-3.2-1b,nomic-embed \
    --prompt-template llama-3-chat,embedding \
    --batch-size 128,8192 \
    --ctx-size 8192,8192

 4. ทดสอบใช้งาน
 $curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content":"You are a helpful AI assistant"}, {"role":"user", "content":"Hello world"}], "model":"llama-3.2-1b"}'

 ผลการทำงาน
 {"id":"chatcmpl-f9831ff6-e7c4-4d64-abc0-aa2800c4e635","object":"chat.completion","created":1733725027,"model":"llama-3.2-1b","choices":[{"index":0,"message":{"content":"It's nice to meet you! Is there something I can help with or would you like to chat?","role":"assistant"},"finish_reason":"stop","logprobs":null}],"usage":{"prompt_tokens":23,"completion_tokens":23,"total_tokens":46}}
diff --git a/hello.py b/hello.py
 import openai

 client = openai.OpenAI(
    base_url="http://localhost:8080/v1/", 
    api_key="dfdf"
 )

 response = client.chat.completions.create(
    model="llama-3.2-1b",
    messages=[
            {"role": "system", "content": "You are a helpful AI assistant"},
            {"role": "user", "content": "Hello world"}
        ],
    temperature=0.7,
    max_tokens=500
 )

 print(response)


 # ผลการทำงาน
 $python hello.py

 ChatCompletion(id='chatcmpl-c27cf688-8d52-48a2-88fb-af41cccfa90f', 
    choices=[Choice(finish_reason='stop', index=0, logprobs=None, 
    message=ChatCompletionMessage(content="Hello! It's nice to meet you. How can I assist you today?", 
    refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], 
    created=1733725397, model='llama-3.2-1b', object='chat.completion', service_tier=None, system_fingerprint=None, 
    usage=CompletionUsage(completion_tokens=18, prompt_tokens=23, total_tokens=41, 
                          completion_tokens_details=None, prompt_tokens_details=None))
	1. ติดตั้ง WasmEdge
	$curl -sSf https://raw.githubusercontent.com/WasmEdge/WasmEdge/master/utils/install_v2.sh \| bash

	2. Download LLM model เพื่อมา run บนเครื่อง local โดยใช้งาน Llama 3.2, 1b
	$curl -LO https://huggingface.co/second-state/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q5_K_M.gguf

	3. ทำการ Download app chat มาใช้งาน เพื่อทดสอบการถามตอบกับ Local LLM server
	$curl -LO https://github.com/second-state/LlamaEdge/releases/latest/download/llama-chat.wasm

	4. ทำการ run app chat เพื่อใช้งาน model Llama 3.2, 1b ที่ทำการ download มา
	$wasmedge --dir .:. --nn-preload default:GGML:AUTO:Llama-3.2-1B-Instruct-Q5_K_M.gguf llama-chat.wasm -p llama-3-chat

	[INFO] llama-chat version: 0.14.17
	[INFO] Model name: default
	[INFO] Model alias: default
	[INFO] Prompt template: llama-3-chat
	[INFO] Context size: 512
	[INFO] Number of tokens to predict: 1024
	[INFO] Number of layers to run on the GPU: 100
	[INFO] Threads: 2
	[INFO] Batch size for prompt processing: 512
	[INFO] Temperature for sampling: 1
	[INFO] Penalize repeat sequence of tokens: 1.1
	[INFO] Presence penalty (0.0 = disabled): 0
	[INFO] Frequency penalty (0.0 = disabled): 0
	[INFO] BNF-like grammar:
	[INFO] Enable prompt log: false
	[INFO] Enable plugin log: false
	[INFO] Temperature for sampling: 1
	[INFO] Wasi-nn-ggml plugin: b4273 (commit c9c6e01d)

	================================== Running in interactive mode. ===================================

	- Press [Ctrl+C] to interject at any time.
	- Press [Return] to end the input.
	- For multi-line inputs, end each line with '\' and press [Return] to get another line.


	[You]:
	hello world, how are you ?

	[Bot]:
	Hello, I'm here and ready to help.

	[You]:
	who are you ?

	[Bot]:
	I'm a computer program designed to assist with questions and tasks.
	1. ทำการ Download API server
	$curl -LO https://github.com/LlamaEdge/LlamaEdge/releases/latest/download/llama-api-server.wasm

	2. ทำการ download embedding model
	$curl -LO https://huggingface.co/gaianet/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf

	3. ทำการ start API Server
	$wasmedge --dir .:. \
	--nn-preload default:GGML:AUTO:Llama-3.2-1B-Instruct-Q5_K_M.gguf \
	--nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5.f16.gguf \
	llama-api-server.wasm \
	--model-alias default,embedding \
	--model-name llama-3.2-1b,nomic-embed \
	--prompt-template llama-3-chat,embedding \
	--batch-size 128,8192 \
	--ctx-size 8192,8192

	4. ทดสอบใช้งาน
	$curl -X POST http://localhost:8080/v1/chat/completions -H 'accept:application/json' -H 'Content-Type: application/json' -d '{"messages":[{"role":"system", "content":"You are a helpful AI assistant"}, {"role":"user", "content":"Hello world"}], "model":"llama-3.2-1b"}'

	ผลการทำงาน
	{"id":"chatcmpl-f9831ff6-e7c4-4d64-abc0-aa2800c4e635","object":"chat.completion","created":1733725027,"model":"llama-3.2-1b","choices":[{"index":0,"message":{"content":"It's nice to meet you! Is there something I can help with or would you like to chat?","role":"assistant"},"finish_reason":"stop","logprobs":null}],"usage":{"prompt_tokens":23,"completion_tokens":23,"total_tokens":46}}
	import openai

	client = openai.OpenAI(
	base_url="http://localhost:8080/v1/",
	api_key="dfdf"
	)

	response = client.chat.completions.create(
	model="llama-3.2-1b",
	messages=[
	{"role": "system", "content": "You are a helpful AI assistant"},
	{"role": "user", "content": "Hello world"}
	],
	temperature=0.7,
	max_tokens=500
	)

	print(response)


	# ผลการทำงาน
	$python hello.py

	ChatCompletion(id='chatcmpl-c27cf688-8d52-48a2-88fb-af41cccfa90f',
	choices=[Choice(finish_reason='stop', index=0, logprobs=None,
	message=ChatCompletionMessage(content="Hello! It's nice to meet you. How can I assist you today?",
	refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))],
	created=1733725397, model='llama-3.2-1b', object='chat.completion', service_tier=None, system_fingerprint=None,
	usage=CompletionUsage(completion_tokens=18, prompt_tokens=23, total_tokens=41,
	completion_tokens_details=None, prompt_tokens_details=None))