Skip to content

Instantly share code, notes, and snippets.

@lucidrains
Last active January 13, 2026 15:50
Show Gist options
  • Select an option

  • Save lucidrains/08f41712008fdb83a87041d34f8f06b0 to your computer and use it in GitHub Desktop.

Select an option

Save lucidrains/08f41712008fdb83a87041d34f8f06b0 to your computer and use it in GitHub Desktop.
gte-pure-c.nim
## GTE-Small Embedding Library - Nim Port
## A single-file, self-contained text embedding solution.
##
## Original C implementation by Antirez (Salvatore Sanfilippo)
## Nim port maintains the same algorithm and produces identical results.
##
## MIT License - Copyright (c) 2026 Salvatore Sanfilippo
## See LICENSE file for full terms.
##
## USAGE: Just compile and run - model downloads automatically on first use!
##
## Build for maximum performance (matches C speed):
## nim c -d:release -d:danger --opt:speed --passC:"-march=native -ffast-math" gte.nim
##
## Build for debugging:
## nim c gte.nim
##
## Requirements: Python 3.8+ with pip (for auto-download only, uv preferred)
import std/[strutils, tables, math, times, os, parseopt, osproc, tempfiles]
# ========================================================================
# Constants
# ========================================================================
const
GTE_MAGIC = "GTE1"
GTE_LAYER_NORM_EPS = 1e-12f32
# Special token IDs
TOKEN_PAD = 0
TOKEN_UNK = 100
TOKEN_CLS = 101
TOKEN_SEP = 102
TOKEN_MASK = 103
# Hash table size for vocabulary
VOCAB_HASH_SIZE = 40009
DEFAULT_MODEL_PATH = "gte-small.gtemodel"
MAX_SENTENCES = 64
# Embedded Python script for downloading and converting the model
DOWNLOAD_SCRIPT = """
# /// script
# requires-python = ">=3.8"
# dependencies = ["huggingface_hub", "safetensors", "numpy"]
# ///
import sys, struct, json, os
from pathlib import Path
def main():
output_path = sys.argv[1] if len(sys.argv) > 1 else "gte-small.gtemodel"
cache_dir = Path.home() / ".cache" / "gte-nim"
cache_dir.mkdir(parents=True, exist_ok=True)
print("Downloading GTE-small model from HuggingFace...")
from huggingface_hub import hf_hub_download
model_file = hf_hub_download("thenlper/gte-small", "model.safetensors", cache_dir=str(cache_dir))
tokenizer_file = hf_hub_download("thenlper/gte-small", "tokenizer.json", cache_dir=str(cache_dir))
config_file = hf_hub_download("thenlper/gte-small", "config.json", cache_dir=str(cache_dir))
# Load config
with open(config_file) as f:
config = json.load(f)
vocab_size = config["vocab_size"]
hidden_size = config["hidden_size"]
num_layers = config["num_hidden_layers"]
num_heads = config["num_attention_heads"]
intermediate_size = config["intermediate_size"]
max_seq_length = config["max_position_embeddings"]
print(f"Model: {vocab_size} vocab, {hidden_size} hidden, {num_layers} layers")
# Extract vocab from tokenizer.json
with open(tokenizer_file) as f:
tokenizer = json.load(f)
vocab_dict = tokenizer["model"]["vocab"]
vocab = [word for word, _ in sorted(vocab_dict.items(), key=lambda x: x[1])]
# Load safetensors
from safetensors import safe_open
tensors = safe_open(model_file, framework="numpy")
print("Converting to .gtemodel format...")
with open(output_path, 'wb') as f:
f.write(b'GTE1')
for val in [vocab_size, hidden_size, num_layers, num_heads, intermediate_size, max_seq_length]:
f.write(struct.pack('<I', val))
for word in vocab:
word_bytes = word.encode('utf-8')
f.write(struct.pack('<H', len(word_bytes)))
f.write(word_bytes)
def write_tensor(name):
f.write(tensors.get_tensor(name).astype('float32').tobytes())
write_tensor("embeddings.word_embeddings.weight")
write_tensor("embeddings.position_embeddings.weight")
write_tensor("embeddings.token_type_embeddings.weight")
write_tensor("embeddings.LayerNorm.weight")
write_tensor("embeddings.LayerNorm.bias")
for l in range(num_layers):
p = f"encoder.layer.{l}"
write_tensor(f"{p}.attention.self.query.weight")
write_tensor(f"{p}.attention.self.query.bias")
write_tensor(f"{p}.attention.self.key.weight")
write_tensor(f"{p}.attention.self.key.bias")
write_tensor(f"{p}.attention.self.value.weight")
write_tensor(f"{p}.attention.self.value.bias")
write_tensor(f"{p}.attention.output.dense.weight")
write_tensor(f"{p}.attention.output.dense.bias")
write_tensor(f"{p}.attention.output.LayerNorm.weight")
write_tensor(f"{p}.attention.output.LayerNorm.bias")
write_tensor(f"{p}.intermediate.dense.weight")
write_tensor(f"{p}.intermediate.dense.bias")
write_tensor(f"{p}.output.dense.weight")
write_tensor(f"{p}.output.dense.bias")
write_tensor(f"{p}.output.LayerNorm.weight")
write_tensor(f"{p}.output.LayerNorm.bias")
write_tensor("pooler.dense.weight")
write_tensor("pooler.dense.bias")
size_mb = os.path.getsize(output_path) / 1024 / 1024
print(f"Model saved to {output_path} ({size_mb:.1f} MB)")
if __name__ == "__main__":
main()
"""
# ========================================================================
# Model Download
# ========================================================================
proc download_model*(output_path: string): bool =
## Download and convert the GTE-small model if not present.
## Uses 'uv' if available (PEP 723 inline script), otherwise falls back to pip.
## Returns true on success.
# Create temp file for the Python script
let (script_file, script_path) = createTempFile("gte_download_", ".py")
script_file.write(DOWNLOAD_SCRIPT)
script_file.close()
defer: removeFile(script_path)
# Try uv first (supports PEP 723 inline dependencies)
let uv_check = execCmdEx("which uv")
if uv_check.exitCode == 0:
echo "Using uv to download model..."
let res = execShellCmd("uv run " & script_path & " " & quoteShell(output_path))
return res == 0
# Fall back to pip + python
echo "Using pip to install dependencies..."
let pip_install = execShellCmd("pip install -q huggingface_hub safetensors 2>/dev/null || pip3 install -q huggingface_hub safetensors")
if pip_install != 0:
stderr.writeLine "Failed to install Python dependencies"
return false
# Try python3 first, then python
let python_check = execCmdEx("which python3")
let python_cmd = if python_check.exitCode == 0: "python3" else: "python"
let res = execShellCmd(python_cmd & " " & script_path & " " & quoteShell(output_path))
return res == 0
# ========================================================================
# Data Structures
# ========================================================================
type
VocabEntry = object
word: string
id: int
LayerWeights = object
# Self-attention
query_weight: seq[float32] # [hidden_size, hidden_size]
query_bias: seq[float32] # [hidden_size]
key_weight: seq[float32] # [hidden_size, hidden_size]
key_bias: seq[float32] # [hidden_size]
value_weight: seq[float32] # [hidden_size, hidden_size]
value_bias: seq[float32] # [hidden_size]
attn_output_weight: seq[float32] # [hidden_size, hidden_size]
attn_output_bias: seq[float32] # [hidden_size]
attn_ln_weight: seq[float32] # [hidden_size]
attn_ln_bias: seq[float32] # [hidden_size]
# Feed-forward network
ffn_inter_weight: seq[float32] # [intermediate_size, hidden_size]
ffn_inter_bias: seq[float32] # [intermediate_size]
ffn_output_weight: seq[float32] # [hidden_size, intermediate_size]
ffn_output_bias: seq[float32] # [hidden_size]
ffn_ln_weight: seq[float32] # [hidden_size]
ffn_ln_bias: seq[float32] # [hidden_size]
GteCtx* = ref object
# Config
vocab_size: int
hidden_size: int
num_layers: int
num_heads: int
intermediate_size: int
max_seq_len: int
head_dim: int
# Vocabulary
vocab: seq[string]
vocab_hash: Table[string, int]
# Embeddings
token_embeddings: seq[float32] # [vocab_size, hidden_size]
position_embeddings: seq[float32] # [max_seq_len, hidden_size]
token_type_embeddings: seq[float32] # [2, hidden_size]
embed_ln_weight: seq[float32] # [hidden_size]
embed_ln_bias: seq[float32] # [hidden_size]
# Transformer layers
layers: seq[LayerWeights]
# Pooler (not used for embeddings but loaded)
pooler_weight: seq[float32] # [hidden_size, hidden_size]
pooler_bias: seq[float32] # [hidden_size]
# Working memory for inference
hidden_states: seq[float32] # [max_seq_len, hidden_size]
attn_scores: seq[float32] # [num_heads, max_seq_len, max_seq_len]
q_proj: seq[float32] # [max_seq_len, hidden_size]
k_proj: seq[float32] # [max_seq_len, hidden_size]
v_proj: seq[float32] # [max_seq_len, hidden_size]
attn_output: seq[float32] # [max_seq_len, hidden_size]
ffn_hidden: seq[float32] # [max_seq_len, intermediate_size]
temp_hidden: seq[float32] # [max_seq_len, hidden_size]
# ========================================================================
# Matrix Operations
# ========================================================================
proc linear(y: var openArray[float32], x: openArray[float32],
W: openArray[float32], b: openArray[float32],
seq_len, in_dim, out_dim: int) =
## Matrix-vector multiplication with bias: y = x @ W^T + b
for s in 0..<seq_len:
for o in 0..<out_dim:
var sum = if b.len > 0: b[o] else: 0.0f32
for i in 0..<in_dim:
sum += x[s * in_dim + i] * W[o * in_dim + i]
y[s * out_dim + o] = sum
proc layer_norm(out_buf: var openArray[float32], x: openArray[float32],
gamma, beta: openArray[float32], seq_len, hidden_size: int) =
## Layer normalization
for s in 0..<seq_len:
let row_offset = s * hidden_size
# Compute mean
var mean = 0.0f32
for i in 0..<hidden_size:
mean += x[row_offset + i]
mean /= hidden_size.float32
# Compute variance
var variance = 0.0f32
for i in 0..<hidden_size:
let diff = x[row_offset + i] - mean
variance += diff * diff
variance /= hidden_size.float32
# Normalize and scale
let std_inv = 1.0f32 / sqrt(variance + GTE_LAYER_NORM_EPS)
for i in 0..<hidden_size:
out_buf[row_offset + i] = gamma[i] * (x[row_offset + i] - mean) * std_inv + beta[i]
proc gelu(x: var openArray[float32], n: int) =
## GELU activation (approximate)
for i in 0..<n:
let val = x[i]
x[i] = 0.5f32 * val * (1.0f32 + tanh(0.7978845608f32 * (val + 0.044715f32 * val * val * val)))
proc softmax(x: var openArray[float32], start, n: int) =
## Softmax over slice [start..start+n)
# Find max for numerical stability
var max_val = x[start]
for i in 1..<n:
if x[start + i] > max_val:
max_val = x[start + i]
# Compute exp and sum
var sum = 0.0f32
for i in 0..<n:
x[start + i] = exp(x[start + i] - max_val)
sum += x[start + i]
# Normalize
let inv_sum = 1.0f32 / sum
for i in 0..<n:
x[start + i] *= inv_sum
proc l2_normalize(x: var openArray[float32], n: int) =
## L2 normalize in place
var norm = 0.0f32
for i in 0..<n:
norm += x[i] * x[i]
norm = sqrt(norm)
if norm > 0.0f32:
let inv_norm = 1.0f32 / norm
for i in 0..<n:
x[i] *= inv_norm
# ========================================================================
# Tokenizer
# ========================================================================
proc is_punctuation(c: char): bool =
let code = ord(c)
(code >= 33 and code <= 47) or (code >= 58 and code <= 64) or
(code >= 91 and code <= 96) or (code >= 123 and code <= 126)
proc is_whitespace(c: char): bool =
c == ' ' or c == '\t' or c == '\n' or c == '\r'
proc basic_tokenize(text: string): seq[string] =
## Basic tokenization: split on whitespace and punctuation, lowercase
result = @[]
var i = 0
while i < text.len:
# Skip whitespace
while i < text.len and is_whitespace(text[i]):
inc i
if i >= text.len:
break
# Find end of token
let start = i
if is_punctuation(text[i]):
inc i
else:
while i < text.len and not is_whitespace(text[i]) and not is_punctuation(text[i]):
inc i
# Create lowercase token
result.add(text[start..<i].toLowerAscii())
proc wordpiece_tokenize(ctx: GteCtx, word: string): seq[int] =
## WordPiece tokenization of a single word
if word.len == 0:
return @[]
result = @[]
var start = 0
while start < word.len:
var end_pos = word.len
var found_id = -1
# Find longest matching subword
while start < end_pos:
var candidate: string
if start > 0:
candidate = "##" & word[start..<end_pos]
else:
candidate = word[start..<end_pos]
if candidate in ctx.vocab_hash:
found_id = ctx.vocab_hash[candidate]
break
dec end_pos
if found_id < 0:
result.add(TOKEN_UNK)
inc start
else:
result.add(found_id)
start = end_pos
proc tokenize(ctx: GteCtx, text: string, max_len: int): seq[int] =
## Full tokenization: text -> token IDs
let basic_tokens = basic_tokenize(text)
result = @[TOKEN_CLS]
for token in basic_tokens:
if result.len >= max_len - 1:
break
let subtokens = wordpiece_tokenize(ctx, token)
for subtoken in subtokens:
if result.len >= max_len - 1:
break
result.add(subtoken)
result.add(TOKEN_SEP)
# ========================================================================
# Transformer Forward Pass
# ========================================================================
proc self_attention(ctx: GteCtx, layer: LayerWeights, seq_len: int, attn_mask: seq[int]) =
let hidden = ctx.hidden_size
let heads = ctx.num_heads
let head_dim = ctx.head_dim
# Project Q, K, V
linear(ctx.q_proj, ctx.hidden_states, layer.query_weight, layer.query_bias, seq_len, hidden, hidden)
linear(ctx.k_proj, ctx.hidden_states, layer.key_weight, layer.key_bias, seq_len, hidden, hidden)
linear(ctx.v_proj, ctx.hidden_states, layer.value_weight, layer.value_bias, seq_len, hidden, hidden)
# Compute attention for each head
let scale = 1.0f32 / sqrt(head_dim.float32)
for h in 0..<heads:
# Attention scores for this head: Q @ K^T / sqrt(d_k)
for i in 0..<seq_len:
for j in 0..<seq_len:
var score = 0.0f32
for d in 0..<head_dim:
let q_idx = i * hidden + h * head_dim + d
let k_idx = j * hidden + h * head_dim + d
score += ctx.q_proj[q_idx] * ctx.k_proj[k_idx]
score *= scale
# Apply attention mask
if attn_mask.len > 0 and attn_mask[j] == 0:
score = -10000.0f32
ctx.attn_scores[h * seq_len * seq_len + i * seq_len + j] = score
# Softmax over keys
softmax(ctx.attn_scores, h * seq_len * seq_len + i * seq_len, seq_len)
# Weighted sum of values
for i in 0..<seq_len:
for d in 0..<head_dim:
var sum = 0.0f32
for j in 0..<seq_len:
let attn = ctx.attn_scores[h * seq_len * seq_len + i * seq_len + j]
let v_idx = j * hidden + h * head_dim + d
sum += attn * ctx.v_proj[v_idx]
ctx.attn_output[i * hidden + h * head_dim + d] = sum
# Output projection
linear(ctx.temp_hidden, ctx.attn_output, layer.attn_output_weight, layer.attn_output_bias, seq_len, hidden, hidden)
# Residual connection and layer norm
for i in 0..<seq_len * hidden:
ctx.temp_hidden[i] += ctx.hidden_states[i]
layer_norm(ctx.hidden_states, ctx.temp_hidden, layer.attn_ln_weight, layer.attn_ln_bias, seq_len, hidden)
proc feed_forward(ctx: GteCtx, layer: LayerWeights, seq_len: int) =
let hidden = ctx.hidden_size
let inter = ctx.intermediate_size
# Intermediate layer
linear(ctx.ffn_hidden, ctx.hidden_states, layer.ffn_inter_weight, layer.ffn_inter_bias, seq_len, hidden, inter)
gelu(ctx.ffn_hidden, seq_len * inter)
# Output layer
linear(ctx.temp_hidden, ctx.ffn_hidden, layer.ffn_output_weight, layer.ffn_output_bias, seq_len, inter, hidden)
# Residual connection and layer norm
for i in 0..<seq_len * hidden:
ctx.temp_hidden[i] += ctx.hidden_states[i]
layer_norm(ctx.hidden_states, ctx.temp_hidden, layer.ffn_ln_weight, layer.ffn_ln_bias, seq_len, hidden)
proc transformer_forward(ctx: GteCtx, token_ids: seq[int], seq_len: int, attn_mask: seq[int]) =
let hidden = ctx.hidden_size
# Compute embeddings
for s in 0..<seq_len:
let token_id = token_ids[s]
for d in 0..<hidden:
ctx.hidden_states[s * hidden + d] =
ctx.token_embeddings[token_id * hidden + d] +
ctx.position_embeddings[s * hidden + d] +
ctx.token_type_embeddings[d] # token_type = 0
# Embedding layer norm
layer_norm(ctx.hidden_states, ctx.hidden_states, ctx.embed_ln_weight, ctx.embed_ln_bias, seq_len, hidden)
# Process each transformer layer
for layer in ctx.layers:
self_attention(ctx, layer, seq_len, attn_mask)
feed_forward(ctx, layer, seq_len)
proc mean_pooling(output: var seq[float32], hidden_states: seq[float32],
attn_mask: seq[int], seq_len, hidden_size: int) =
## Mean pooling over non-padded tokens
# Initialize output to zero
for i in 0..<hidden_size:
output[i] = 0.0f32
# Sum up hidden states for non-padded tokens
var count = 0
for s in 0..<seq_len:
if attn_mask[s] != 0:
for d in 0..<hidden_size:
output[d] += hidden_states[s * hidden_size + d]
inc count
# Average
if count > 0:
let inv_count = 1.0f32 / count.float32
for d in 0..<hidden_size:
output[d] *= inv_count
# ========================================================================
# Model Loading
# ========================================================================
proc read_uint32(f: File): int =
var buf: array[4, uint8]
if f.readBytes(buf, 0, 4) != 4:
raise newException(IOError, "Failed to read uint32")
result = int(buf[0]) or (int(buf[1]) shl 8) or (int(buf[2]) shl 16) or (int(buf[3]) shl 24)
proc read_uint16(f: File): int =
var buf: array[2, uint8]
if f.readBytes(buf, 0, 2) != 2:
raise newException(IOError, "Failed to read uint16")
result = int(buf[0]) or (int(buf[1]) shl 8)
proc read_floats(f: File, count: int): seq[float32] =
result = newSeq[float32](count)
let bytes = count * sizeof(float32)
if f.readBuffer(addr result[0], bytes) != bytes:
raise newException(IOError, "Failed to read floats")
proc gte_load*(model_path: string): GteCtx =
## Load model from .gtemodel file
if not fileExists(model_path):
stderr.writeLine "gte_load: cannot open ", model_path
return nil
var f = open(model_path, fmRead)
defer: f.close()
# Check magic
var magic: array[4, char]
if f.readChars(magic) != 4 or magic != ['G', 'T', 'E', '1']:
stderr.writeLine "gte_load: invalid magic"
return nil
result = GteCtx()
# Read config
result.vocab_size = read_uint32(f)
result.hidden_size = read_uint32(f)
result.num_layers = read_uint32(f)
result.num_heads = read_uint32(f)
result.intermediate_size = read_uint32(f)
result.max_seq_len = read_uint32(f)
result.head_dim = result.hidden_size div result.num_heads
# Read vocabulary
result.vocab = newSeq[string](result.vocab_size)
result.vocab_hash = initTable[string, int]()
for i in 0..<result.vocab_size:
let len = read_uint16(f)
var word = newString(len)
if f.readChars(word) != len:
raise newException(IOError, "Failed to read vocab word")
result.vocab[i] = word
result.vocab_hash[word] = i
# Read embeddings
result.token_embeddings = read_floats(f, result.vocab_size * result.hidden_size)
result.position_embeddings = read_floats(f, result.max_seq_len * result.hidden_size)
result.token_type_embeddings = read_floats(f, 2 * result.hidden_size)
result.embed_ln_weight = read_floats(f, result.hidden_size)
result.embed_ln_bias = read_floats(f, result.hidden_size)
# Read transformer layers
result.layers = newSeq[LayerWeights](result.num_layers)
for l in 0..<result.num_layers:
result.layers[l].query_weight = read_floats(f, result.hidden_size * result.hidden_size)
result.layers[l].query_bias = read_floats(f, result.hidden_size)
result.layers[l].key_weight = read_floats(f, result.hidden_size * result.hidden_size)
result.layers[l].key_bias = read_floats(f, result.hidden_size)
result.layers[l].value_weight = read_floats(f, result.hidden_size * result.hidden_size)
result.layers[l].value_bias = read_floats(f, result.hidden_size)
result.layers[l].attn_output_weight = read_floats(f, result.hidden_size * result.hidden_size)
result.layers[l].attn_output_bias = read_floats(f, result.hidden_size)
result.layers[l].attn_ln_weight = read_floats(f, result.hidden_size)
result.layers[l].attn_ln_bias = read_floats(f, result.hidden_size)
result.layers[l].ffn_inter_weight = read_floats(f, result.intermediate_size * result.hidden_size)
result.layers[l].ffn_inter_bias = read_floats(f, result.intermediate_size)
result.layers[l].ffn_output_weight = read_floats(f, result.hidden_size * result.intermediate_size)
result.layers[l].ffn_output_bias = read_floats(f, result.hidden_size)
result.layers[l].ffn_ln_weight = read_floats(f, result.hidden_size)
result.layers[l].ffn_ln_bias = read_floats(f, result.hidden_size)
# Read pooler (not used for embeddings)
result.pooler_weight = read_floats(f, result.hidden_size * result.hidden_size)
result.pooler_bias = read_floats(f, result.hidden_size)
# Allocate working memory
let max_seq = result.max_seq_len
let hidden = result.hidden_size
let inter = result.intermediate_size
let heads = result.num_heads
result.hidden_states = newSeq[float32](max_seq * hidden)
result.attn_scores = newSeq[float32](heads * max_seq * max_seq)
result.q_proj = newSeq[float32](max_seq * hidden)
result.k_proj = newSeq[float32](max_seq * hidden)
result.v_proj = newSeq[float32](max_seq * hidden)
result.attn_output = newSeq[float32](max_seq * hidden)
result.ffn_hidden = newSeq[float32](max_seq * inter)
result.temp_hidden = newSeq[float32](max_seq * hidden)
# ========================================================================
# Public API
# ========================================================================
proc gte_embed*(ctx: GteCtx, text: string): seq[float32] =
## Generate embedding for a single text
if ctx == nil:
return @[]
# Tokenize
let token_ids = tokenize(ctx, text, ctx.max_seq_len)
let num_tokens = token_ids.len
# Create attention mask
var attn_mask = newSeq[int](num_tokens)
for i in 0..<num_tokens:
attn_mask[i] = 1
# Run transformer
transformer_forward(ctx, token_ids, num_tokens, attn_mask)
# Mean pooling
result = newSeq[float32](ctx.hidden_size)
mean_pooling(result, ctx.hidden_states, attn_mask, num_tokens, ctx.hidden_size)
# L2 normalize
l2_normalize(result, ctx.hidden_size)
proc gte_dim*(ctx: GteCtx): int =
## Get the embedding dimension (384 for GTE-small)
if ctx != nil: ctx.hidden_size else: 0
proc gte_max_seq_len*(ctx: GteCtx): int =
## Get the maximum sequence length (512 for GTE-small)
if ctx != nil: ctx.max_seq_len else: 0
proc gte_cosine_similarity*(a, b: openArray[float32], dim: int): float32 =
## Compute cosine similarity between two embeddings
## Assumes normalized vectors, so dot product = cosine similarity
result = 0.0f32
for i in 0..<dim:
result += a[i] * b[i]
# ========================================================================
# Test Program
# ========================================================================
proc print_embedding(emb: openArray[float32], dim, n: int) =
stdout.write "["
for i in 0..<min(n, dim):
stdout.write emb[i].formatFloat(ffDecimal, 6)
if i < n - 1:
stdout.write ", "
if n < dim:
stdout.write ", ..."
echo "]"
proc print_usage(prog: string) =
echo "Usage: ", prog, " [OPTIONS] [SENTENCES...]"
echo ""
echo "Test GTE-small embedding model by computing embeddings and similarity matrix."
echo "The model is automatically downloaded on first run if not present."
echo ""
echo "Options:"
echo " --model-path PATH Path to .gtemodel file (default: ", DEFAULT_MODEL_PATH, ")"
echo " --help Show this help message"
echo ""
echo "Arguments:"
echo " SENTENCES One or more sentences to embed (quote each sentence)"
echo " If none provided, uses built-in example sentences"
echo ""
echo "Examples:"
echo " ", prog
echo " ", prog, " \"Hello world\" \"Goodbye world\""
echo " ", prog, " --model-path my-model.gtemodel \"Test sentence\""
when isMainModule:
var model_path = DEFAULT_MODEL_PATH
var user_sentences: seq[string] = @[]
# Parse arguments
var p = initOptParser()
while true:
p.next()
case p.kind
of cmdEnd: break
of cmdShortOption, cmdLongOption:
case p.key
of "help", "h":
print_usage(getAppFilename())
quit(0)
of "model-path":
model_path = p.val
else:
stderr.writeLine "Error: Unknown option '", p.key, "'"
quit(1)
of cmdArgument:
user_sentences.add(p.key)
# Default sentences if none provided
let default_sentences = @[
"The weather is lovely today.",
"It's so sunny outside!",
"He drove to the stadium.",
"Machine learning is transforming industries.",
"I love programming in C."
]
let sentences = if user_sentences.len > 0: user_sentences else: default_sentences
# Auto-download model if not present
if not fileExists(model_path):
echo "Model not found at ", model_path
echo "Attempting to download GTE-small model..."
if not download_model(model_path):
stderr.writeLine "Failed to download model. Please download manually."
quit(1)
# Load model
echo "Loading model from ", model_path, "..."
let start_load = cpuTime()
let ctx = gte_load(model_path)
if ctx == nil:
stderr.writeLine "Failed to load model"
quit(1)
let load_time = cpuTime() - start_load
echo "Model loaded in ", load_time.formatFloat(ffDecimal, 2), " seconds"
echo "Embedding dimension: ", gte_dim(ctx)
echo "Max sequence length: ", gte_max_seq_len(ctx)
echo ""
# Generate embeddings
echo "Generating embeddings..."
echo ""
var embeddings: seq[seq[float32]] = @[]
for i, sentence in sentences:
let start_embed = cpuTime()
let embedding = gte_embed(ctx, sentence)
let embed_time = cpuTime() - start_embed
embeddings.add(embedding)
echo "S", i + 1, ": \"", sentence, "\""
echo " Time: ", (embed_time * 1000).formatFloat(ffDecimal, 3), " ms"
stdout.write " Embedding: "
print_embedding(embedding, gte_dim(ctx), 5)
echo ""
# Compute similarity matrix
echo "Cosine similarity matrix:"
stdout.write " "
for i in 0..<sentences.len:
stdout.write " S", i + 1, " "
echo ""
for i in 0..<sentences.len:
stdout.write "S", i + 1, ": "
for j in 0..<sentences.len:
let sim = gte_cosine_similarity(embeddings[i], embeddings[j], gte_dim(ctx))
stdout.write " ", sim.formatFloat(ffDecimal, 3), " "
echo ""
echo ""
echo "Done!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment