Skip to content

Instantly share code, notes, and snippets.

@ochafik
Last active April 30, 2024 08:14
Show Gist options
  • Select an option

  • Save ochafik/9e929cb6c5f7f32cd67e4904f3e6d226 to your computer and use it in GitHub Desktop.

Select an option

Save ochafik/9e929cb6c5f7f32cd67e4904f3e6d226 to your computer and use it in GitHub Desktop.
Simple llama.cpp benchmark

Context: testing flash attention from ggml-org/llama.cpp#5021

Prerequisite

git clone https://github.com/ggerganov/llama.cpp
cd llama.cpp
git fetch upstream
git checkout upstream/gg/flash-attn
git pull
make clean && make -j LLAMA_CURL=1 main llama-bench

llama-bench

./llama-bench -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf -fa 0,1 -p 512,1024 -n 128,256,512,1024

M3 Pro 36gb 18gpu

model size params backend ngl fa test t/s
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 pp 512 308.84 ± 1.40
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 tg 128 24.88 ± 0.14
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 tg 256 24.88 ± 0.16
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 pp 512 324.32 ± 1.02
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 tg 128 25.75 ± 0.12
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 tg 256 25.50 ± 0.33

M1 Ultra 128gb 64gpu

model size params backend ngl fa test t/s
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 pp 512 824.02 ± 13.65
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 pp 1024 837.59 ± 5.64
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 tg 128 66.80 ± 0.10
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 tg 256 66.99 ± 0.12
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 tg 512 66.48 ± 0.03
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 0 tg 1024 65.23 ± 0.07
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 pp 512 905.47 ± 0.70
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 pp 1024 895.86 ± 0.62
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 tg 128 70.67 ± 0.08
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 tg 256 70.63 ± 0.03
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 tg 512 70.31 ± 0.04
llama 8B Q4_K - Medium 4.58 GiB 8.03 B Metal 99 1 tg 1024 69.68 ± 0.06

Custom benchmark

hyperfine --warmup 1 --runs 10 \
    -L flag -fa, \
    -L model models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf \
    -L n_prompt 10,100,1000 \
    -L n_predict 10,100,1000 \
    --setup 'python -c "print(list(dict(abc=1).keys())[0] * {n_prompt})" > prompt-abc-{n_prompt}.txt' \
    './main {flag} -f prompt-abc-{n_prompt}.txt -n {n_predict} -m {model} --seed 123 --top_p 0.0 --top_k 1 -c 2500' \
    --export-json results.json

python analyze_results.py results.json

Meta-Llama-3-8B-Instruct-Q4_K_M.gguf

M1 Ultra 128gb 64gpu

prompt \ n 10 100 1000
10 -0.68% -1.43% -5.33%
100 -0.70% -3.88% -5.57%
1000 -1.41% -4.81% -9.26%

M3 Pro 36gb 18gpu

prompt \ n 10 100 1000
10 -2.26% -2.39% -9.45%
100 -0.85% -2.41% -7.69%
1000 -5.23% -8.13% -9.61%
import json,os,sys
[results_file] = sys.argv[1:]
with open(results_file) as f:
results = json.load(f)['results']
def params(params, without):
return {k: v for k, v in params.items() if k not in without}
by_params = {}
for result in results:
ps = json.dumps(params(result['parameters'], without=['flag']))
if ps in by_params:
arr = by_params[ps]
else:
arr = []
by_params[ps] = arr
arr.append(result)
# print(arr)
arr.sort(key=lambda x: json.dumps(x['parameters']['flag']))
def sortby(l: list, key):
r = list(l)
r.sort(key=key)
return r
x_dim = 'n_predict'
y_dim = 'n_prompt'
x_values = sortby(set(r['parameters'][x_dim] for r in results), key=lambda x: float(x))
y_values = sortby(set(r['parameters'][y_dim] for r in results), key=lambda x: float(x))
table = [[None for _ in range(len(x_values))] for _ in range(len(y_values))]
for ps, results in by_params.items():
# print(f'Parameters: {ps} ({json.dumps(results)} results)')
if len(results) < 2:
continue
[r1, r2] = results
m1 = r1['mean']
m2 = r2['mean']
f1 = r1['parameters']['flag']
f2 = r2['parameters']['flag']
assert f1 == '' and f2 == '-fa'
faster_percent = 100 * (m2 - m1) / m1
ps = params(r1['parameters'], without=['flag', 'model'])
res = f'{"+" if faster_percent > 0 else ""}{faster_percent:.2f}%'
print(f'Speedup for flag={f1} -> flag={f2}: {res} {json.dumps(ps)}')
x = x_values.index(ps[x_dim])
y = y_values.index(ps[y_dim])
table[y][x] = res
# print(f'Speedup for flag={f1} (mean = {m1}) -> flag={f2} (mean = {m2}): mean {m2/m1}x')
# Generate the markdown table rows
header = "| prompt \ n | " + " | ".join(str(x) for x in x_values) + " |\n"
header += "| " + "--- |" * (len(x_values) + 1) + "\n"
rows = ""
for y, row in zip(y_values, table):
formatted_row = "| " + str(y) + " | "
formatted_row += " | ".join(str(cell) for cell in row)
formatted_row += " |\n"
rows += formatted_row
markdown_table = header + rows
print(markdown_table)
{
"results": [
{
"command": "./main -fa -f prompt-abc-10.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.2602535450199999,
"stddev": 0.006942608534263967,
"median": 1.26241626132,
"user": 0.5491224,
"system": 0.16038526,
"min": 1.24765992832,
"max": 1.2673547203200002,
"times": [
1.26276838632,
1.24765992832,
1.24974717832,
1.26321467832,
1.26124176232,
1.26561826132,
1.26706692832,
1.25579947032,
1.2673547203200002,
1.26206413632
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "10"
}
},
{
"command": "./main -f prompt-abc-10.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.2689199282199999,
"stddev": 0.015431327865024063,
"median": 1.26945449082,
"user": 0.55142,
"system": 0.17635615999999996,
"min": 1.23607038632,
"max": 1.2880999693200001,
"times": [
1.27730847032,
1.28721222032,
1.23607038632,
1.2880999693200001,
1.26973442832,
1.2682853033200001,
1.25875309432,
1.25775888632,
1.26917455332,
1.27680197032
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "10"
}
},
{
"command": "./main -fa -f prompt-abc-100.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.34334681582,
"stddev": 0.016189329334083905,
"median": 1.3446718238200002,
"user": 0.5527941000000001,
"system": 0.14426825999999998,
"min": 1.3008281783200002,
"max": 1.3614146363200001,
"times": [
1.3543338873200002,
1.3614146363200001,
1.3463662193200001,
1.3434428873200002,
1.3418932623200002,
1.34309338632,
1.3441332193200002,
1.3452104283200002,
1.3527520533200001,
1.3008281783200002
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "100"
}
},
{
"command": "./main -f prompt-abc-100.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.35276293662,
"stddev": 0.008582704271617324,
"median": 1.3520153238200001,
"user": 0.5482048999999999,
"system": 0.17817796,
"min": 1.33696776132,
"max": 1.36291688632,
"times": [
1.33696776132,
1.3606522623200001,
1.34509234532,
1.35291238632,
1.36291688632,
1.3624349283200001,
1.3511182613200001,
1.35949251132,
1.3491961783200002,
1.34684584532
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "100"
}
},
{
"command": "./main -fa -f prompt-abc-1000.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 2.3489431826200002,
"stddev": 0.024891563192925504,
"median": 2.34258724132,
"user": 0.5686524999999999,
"system": 0.15937116,
"min": 2.32202959432,
"max": 2.39640342832,
"times": [
2.34512392832,
2.39106122032,
2.32202959432,
2.33453572032,
2.34317422032,
2.33222842832,
2.34200026232,
2.33337947032,
2.34949555332,
2.39640342832
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "1000"
}
},
{
"command": "./main -f prompt-abc-1000.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 2.38244081582,
"stddev": 0.021161878779801163,
"median": 2.38043744882,
"user": 0.5629837999999999,
"system": 0.18029105999999998,
"min": 2.36037672032,
"max": 2.4335940533200002,
"times": [
2.3672015113200002,
2.4335940533200002,
2.36037672032,
2.38113313632,
2.37974176132,
2.38457022032,
2.3667038863200003,
2.39904876232,
2.36988597032,
2.3821521363200002
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "1000"
}
},
{
"command": "./main -fa -f prompt-abc-10.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 2.58061370332,
"stddev": 0.018549096601533553,
"median": 2.58388453282,
"user": 0.6584540999999999,
"system": 0.23583335999999994,
"min": 2.54702405332,
"max": 2.59866005332,
"times": [
2.57575621932,
2.58785755332,
2.59349263632,
2.54702405332,
2.57536542832,
2.59866005332,
2.59791901232,
2.59756851132,
2.55258205332,
2.5799115123200003
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "10"
}
},
{
"command": "./main -f prompt-abc-10.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 2.61793289502,
"stddev": 0.0064337679774784514,
"median": 2.6149530743200002,
"user": 0.6636268999999999,
"system": 0.21262936,
"min": 2.61161826232,
"max": 2.62845088632,
"times": [
2.62777159432,
2.6240909283200002,
2.61793755332,
2.61161826232,
2.61419817832,
2.62845088632,
2.61377905332,
2.61253613632,
2.6157079703200004,
2.61323838732
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "10"
}
},
{
"command": "./main -fa -f prompt-abc-100.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 2.64469720732,
"stddev": 0.008573287662635163,
"median": 2.6463846988200004,
"user": 0.6608748999999998,
"system": 0.19825995999999999,
"min": 2.63284146932,
"max": 2.65639071932,
"times": [
2.65639071932,
2.65187930332,
2.63284146932,
2.6395722203200003,
2.63706634432,
2.64867901232,
2.63331122032,
2.64549388632,
2.65446238632,
2.64727551132
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "100"
}
},
{
"command": "./main -f prompt-abc-100.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 2.75139517812,
"stddev": 0.010002563998979794,
"median": 2.75178424032,
"user": 0.6665204,
"system": 0.24709315999999998,
"min": 2.73614455332,
"max": 2.76581813632,
"times": [
2.74960813632,
2.74709447032,
2.74176921932,
2.7539603443200003,
2.74139305332,
2.76581813632,
2.75635538632,
2.7568153033200002,
2.73614455332,
2.76499317832
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "100"
}
},
{
"command": "./main -fa -f prompt-abc-1000.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 3.69440775352,
"stddev": 0.016713214103600656,
"median": 3.68730144932,
"user": 0.6684450999999999,
"system": 0.21122896,
"min": 3.68058738732,
"max": 3.73105505332,
"times": [
3.6892450123200002,
3.69817638632,
3.73105505332,
3.68535788632,
3.68268422032,
3.70149851232,
3.6813070953200002,
3.68153405332,
3.71263192832,
3.68058738732
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "1000"
}
},
{
"command": "./main -f prompt-abc-1000.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 3.8811586824199997,
"stddev": 0.013858033960200232,
"median": 3.87853694882,
"user": 0.6770918,
"system": 0.21665405999999998,
"min": 3.86245380332,
"max": 3.90036059532,
"times": [
3.89905680332,
3.87886817832,
3.87505117832,
3.88428155332,
3.86245380332,
3.89648472032,
3.86255463632,
3.87820571932,
3.90036059532,
3.87426963632
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "1000"
}
},
{
"command": "./main -fa -f prompt-abc-10.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 16.11183477842,
"stddev": 0.10624366691889775,
"median": 16.07080384532,
"user": 1.7812607999999996,
"system": 0.6309652600000001,
"min": 16.01803430332,
"max": 16.33122238632,
"times": [
16.01803430332,
16.05845030332,
16.07522330332,
16.063463595320002,
16.068945387320003,
16.06410834532,
16.07748922032,
16.33122238632,
16.07266230332,
16.28874863632
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "10"
}
},
{
"command": "./main -f prompt-abc-10.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 17.01912262822,
"stddev": 0.03399484841795368,
"median": 17.024256907820003,
"user": 1.8103107999999999,
"system": 0.61128336,
"min": 16.95302505332,
"max": 17.06232134432,
"times": [
17.06107509532,
17.032600387320002,
17.01591342832,
16.95302505332,
16.99279284532,
17.03972117832,
17.06232134432,
17.03583638632,
17.00050796932,
16.997432594320003
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "10"
}
},
{
"command": "./main -fa -f prompt-abc-100.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 16.276526557420002,
"stddev": 0.24018119996849419,
"median": 16.18405363632,
"user": 1.7853362999999998,
"system": 0.60355886,
"min": 16.130332512320003,
"max": 16.879658720320002,
"times": [
16.20025038632,
16.517612428320003,
16.138683678320003,
16.174819803320002,
16.879658720320002,
16.22634896932,
16.130332512320003,
16.19328746932,
16.138646094320002,
16.165625512320002
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "100"
}
},
{
"command": "./main -f prompt-abc-100.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 17.23655477002,
"stddev": 0.037416821063938024,
"median": 17.24337380332,
"user": 1.8230629999999999,
"system": 0.58497746,
"min": 17.17042176132,
"max": 17.28821280332,
"times": [
17.244202178320002,
17.251026595320003,
17.28821280332,
17.273072553320002,
17.23476909532,
17.17042176132,
17.19822259532,
17.26616242832,
17.24254542832,
17.19691226132
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "100"
}
},
{
"command": "./main -fa -f prompt-abc-1000.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 17.714646603300004,
"stddev": 0.01248023925272666,
"median": 17.7147974368,
"user": 1.7534393799999997,
"system": 0.5471676600000001,
"min": 17.6878740828,
"max": 17.735748915800002,
"times": [
17.715934790800002,
17.7123714158,
17.7245066238,
17.6878740828,
17.7223002488,
17.7136600828,
17.735748915800002,
17.7075699988,
17.7165774578,
17.7099224158
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "1000"
}
},
{
"command": "./main -f prompt-abc-1000.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 19.5221142826,
"stddev": 0.039489198679581755,
"median": 19.509671269800002,
"user": 1.84623268,
"system": 0.6098854599999999,
"min": 19.4760737908,
"max": 19.6059593328,
"times": [
19.4920580408,
19.499046123800003,
19.5202964158,
19.5581712078,
19.5447937498,
19.4760737908,
19.4940658738,
19.4959348748,
19.5347434158,
19.6059593328
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "1000"
}
}
]
}
{
"results": [
{
"command": "./main -fa -f prompt-abc-10.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.1605286075199999,
"stddev": 0.014607934131473883,
"median": 1.1624897239199998,
"user": 0.34798184,
"system": 0.11494544000000002,
"min": 1.13353934992,
"max": 1.1868276819199999,
"times": [
1.1645148909199998,
1.1868276819199999,
1.1610293489199999,
1.16857251592,
1.17237434992,
1.16395009892,
1.1546922659199998,
1.1479840989199999,
1.13353934992,
1.15180147392
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "10"
}
},
{
"command": "./main -f prompt-abc-10.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.1873813617200002,
"stddev": 0.03194352307444818,
"median": 1.18000780792,
"user": 0.34941384,
"system": 0.11774044000000002,
"min": 1.14562805792,
"max": 1.25601859892,
"times": [
1.1698307239199999,
1.14562805792,
1.22842272492,
1.1880988909199999,
1.17808409992,
1.25601859892,
1.18571143192,
1.16721143192,
1.17287614092,
1.18193151592
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "10"
}
},
{
"command": "./main -fa -f prompt-abc-100.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.4636381158200003,
"stddev": 0.019350633918236215,
"median": 1.4640481619199999,
"user": 0.36092464,
"system": 0.12602214,
"min": 1.43731168192,
"max": 1.49993130792,
"times": [
1.46679664092,
1.45513622392,
1.43896605692,
1.47114059992,
1.4616704329199999,
1.45319018192,
1.48581214092,
1.43731168192,
1.4664258909199999,
1.49993130792
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "100"
}
},
{
"command": "./main -f prompt-abc-100.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 1.47611461182,
"stddev": 0.01452156613035438,
"median": 1.47585543242,
"user": 0.35184903999999995,
"system": 0.11936694,
"min": 1.45937659992,
"max": 1.50950780792,
"times": [
1.46299068292,
1.47326264092,
1.4784482239199999,
1.48231543292,
1.46288222392,
1.45937659992,
1.50950780792,
1.4695314739199998,
1.48277864092,
1.4800523909199998
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "100"
}
},
{
"command": "./main -fa -f prompt-abc-1000.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 4.16443603262,
"stddev": 0.022022067966942768,
"median": 4.17095826642,
"user": 0.35946543999999997,
"system": 0.12096954,
"min": 4.126829015919999,
"max": 4.19462926592,
"times": [
4.126829015919999,
4.1705614749199995,
4.19462926592,
4.17911439092,
4.17779543292,
4.1818190159199995,
4.156638056919999,
4.17135505792,
4.15423505692,
4.13138355792
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "1000"
}
},
{
"command": "./main -f prompt-abc-1000.txt -n 10 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 4.39417138702,
"stddev": 0.03242337099354441,
"median": 4.383675182919999,
"user": 0.37088624,
"system": 0.12049014000000002,
"min": 4.36139880792,
"max": 4.46314530792,
"times": [
4.3980037659199995,
4.38017047492,
4.36139880792,
4.38717989092,
4.37280122492,
4.370051849919999,
4.3923973489199994,
4.439351890919999,
4.46314530792,
4.37721330792
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "10",
"n_prompt": "1000"
}
},
{
"command": "./main -fa -f prompt-abc-10.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 4.723757016119999,
"stddev": 0.03534456898058924,
"median": 4.716524766419999,
"user": 0.45467224,
"system": 0.16018194,
"min": 4.69500809892,
"max": 4.81702918292,
"times": [
4.81702918292,
4.69912747492,
4.703375890919999,
4.717760849919999,
4.727152848919999,
4.71528868292,
4.70193293292,
4.69500809892,
4.72821755792,
4.732676640919999
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "10"
}
},
{
"command": "./main -f prompt-abc-10.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 4.83933209932,
"stddev": 0.011137861268246656,
"median": 4.83981941242,
"user": 0.4483678400000001,
"system": 0.15757254,
"min": 4.819531598919999,
"max": 4.852882140919999,
"times": [
4.819531598919999,
4.83708172492,
4.85200005692,
4.84255709992,
4.852882140919999,
4.84452064092,
4.8509736409199995,
4.83310651592,
4.82940680792,
4.83126076592
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "10"
}
},
{
"command": "./main -fa -f prompt-abc-100.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 5.01306494082,
"stddev": 0.0508750897612245,
"median": 5.003135932419999,
"user": 0.44577083999999995,
"system": 0.15390804000000002,
"min": 4.96540551592,
"max": 5.14010109892,
"times": [
5.14010109892,
5.00608193192,
4.96540551592,
4.99222851592,
4.97672376592,
5.00542364092,
5.02533180792,
4.9725503909199995,
5.04595451592,
5.000848223919999
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "100"
}
},
{
"command": "./main -f prompt-abc-100.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 5.13673639922,
"stddev": 0.051653350494088826,
"median": 5.13067628642,
"user": 0.45641124,
"system": 0.16119314,
"min": 5.06617809992,
"max": 5.23011184892,
"times": [
5.20349739092,
5.1422879319199994,
5.06617809992,
5.11189726592,
5.07255268292,
5.23011184892,
5.11906464092,
5.11873830692,
5.14855318292,
5.1544826409199995
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "100"
}
},
{
"command": "./main -fa -f prompt-abc-1000.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 7.84185486152,
"stddev": 0.02545954905681693,
"median": 7.837865432419999,
"user": 0.45512014,
"system": 0.17103194,
"min": 7.79633164092,
"max": 7.87458026592,
"times": [
7.79633164092,
7.82796159892,
7.85685180692,
7.83878622392,
7.8702037659199995,
7.82250001592,
7.87458026592,
7.86959559892,
7.82479305692,
7.83694464092
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "1000"
}
},
{
"command": "./main -f prompt-abc-1000.txt -n 100 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 8.53536677852,
"stddev": 0.5051385052558667,
"median": 8.35617753692,
"user": 0.45832714,
"system": 0.17668284,
"min": 8.28440259892,
"max": 9.93620455792,
"times": [
8.34068547492,
8.69061180692,
9.93620455792,
8.343484932920001,
8.32571780792,
8.31301884992,
8.28440259892,
8.37632684892,
8.36887014092,
8.37434476592
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "100",
"n_prompt": "1000"
}
},
{
"command": "./main -fa -f prompt-abc-10.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 41.273952536719996,
"stddev": 0.6900304380153305,
"median": 41.02784141192,
"user": 1.39621014,
"system": 0.48244644000000003,
"min": 40.496846890920004,
"max": 42.58076905692,
"times": [
42.26274134892,
42.58076905692,
41.71693326592,
41.158714265920004,
40.82061222392,
41.05695909992,
40.99872372392,
40.98552939092,
40.66169609992,
40.496846890920004
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "10"
}
},
{
"command": "./main -f prompt-abc-10.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 45.58094590772,
"stddev": 3.2007901763643187,
"median": 43.82052334892,
"user": 1.53880654,
"system": 0.50608444,
"min": 42.73994668292,
"max": 51.86725130792,
"times": [
42.73994668292,
43.18275797492,
43.09711118292,
43.46636897392,
43.87088126592,
43.77016543192,
47.14919139092,
49.81872072492,
51.86725130792,
46.847064140920004
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "10"
}
},
{
"command": "./main -fa -f prompt-abc-100.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 40.91696357442,
"stddev": 0.395907367587856,
"median": 40.82905845392,
"user": 1.4039897399999997,
"system": 0.48524813999999994,
"min": 40.51949051592,
"max": 41.77249051592,
"times": [
41.35086043292,
41.77249051592,
41.01121009892,
40.655057057920004,
40.55587355792,
40.747345099920004,
40.62543464092,
40.51949051592,
40.91077180792,
41.02110201592
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "100"
}
},
{
"command": "./main -f prompt-abc-100.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 44.32472672832,
"stddev": 0.8614790648640065,
"median": 44.147774224420004,
"user": 1.44927884,
"system": 0.48036893999999997,
"min": 43.05575514092,
"max": 45.71893864092,
"times": [
44.03217259892,
45.14005380692,
44.26337584992,
43.85749647392,
45.18589793292,
45.71893864092,
43.80857655692,
43.05575514092,
44.81253501592,
43.37246526592
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "100"
}
},
{
"command": "./main -fa -f prompt-abc-1000.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 45.47025037011999,
"stddev": 1.1311234160658492,
"median": 44.964616120420004,
"user": 1.5548881399999999,
"system": 0.50261234,
"min": 44.15631705792,
"max": 47.45952297392,
"times": [
44.65367772392,
47.45952297392,
46.60127326592,
46.42840722392,
44.652589098920004,
46.33456993292,
44.77308693292,
45.15614530792,
44.48691418292,
44.15631705792
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "-fa",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "1000"
}
},
{
"command": "./main -f prompt-abc-1000.txt -n 1000 -m models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf --seed 123 --top_p 0.0 --top_k 1 -c 2500",
"mean": 50.302347678320004,
"stddev": 3.19938060140542,
"median": 49.10252649492,
"user": 1.8409239400000001,
"system": 0.5494007399999999,
"min": 48.809881806920004,
"max": 59.154801182920004,
"times": [
49.36158209992,
49.72363247392,
49.231737015920004,
48.809881806920004,
48.848667098920004,
48.82314343292,
48.97331597392,
59.154801182920004,
51.26659447392,
48.83012122392
],
"exit_codes": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"parameters": {
"flag": "",
"model": "models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"n_predict": "1000",
"n_prompt": "1000"
}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment