Skip to content

Instantly share code, notes, and snippets.

@kazuho
Last active May 9, 2025 01:36
Show Gist options
  • Save kazuho/f67b96b218e5e90411140fab4e8db390 to your computer and use it in GitHub Desktop.
Save kazuho/f67b96b218e5e90411140fab4e8db390 to your computer and use it in GitHub Desktop.
proxy that adds function call capabilities to llama.cpp server
// proxy.js
// Express server wrapping llama.cpp server with OpenAI function-calling semantics
// Logs client requests/responses and model requests/responses to console
const express = require('express');
const axios = require('axios');
const bodyParser = require('body-parser');
const { v4: uuidv4 } = require('uuid');
const { spawn } = require('child_process');
const LLM_ORIGIN = 'http://r6:11434';
const LLM_URL = `${LLM_ORIGIN}/v1/chat/completions`;
const PORT = 11434;
// In-memory map to track original messages by id
const messageStore = new Map();
// Function definitions
function get_current_time() {
return { utc: new Date().toISOString() };
}
get_current_time.llm = {
description: 'returns current time in UTC (+0000)',
parameters: { type: 'object', properties: {}, required: [] },
};
async function runBc(input) {
return new Promise((resolve, reject) => {
const bc = spawn('bc', ['-l']);
let output = '';
bc.stdout.on('data', (chunk) => {
output += chunk.toString();
});
bc.stderr.on('data', (chunk) => {
output += chunk.toString();
});
bc.on('error', (err) => {
reject(err);
});
bc.on('close', (code) => {
resolve(output);
});
bc.stdin.write(input.formula);
bc.stdin.end();
});
}
runBc.llm = {
description: 'calculates formulas and returns the result, by calling the bc command',
parameters: {
type: 'object',
properties: {
formula: { type: "string", "decription": "the formula to calculate"},
},
required: ["formula"]
}
};
const functions = Object.fromEntries(
[get_current_time, runBc].map(def => [def.name, def])
);
const tools = Object.keys(functions).map(name => ({
type: 'function',
function: {
name: name,
description: functions[name].llm.description,
parameters: functions[name].llm.parameters
}
}));
const app = express();
app.use(bodyParser.json());
// Expand any message IDs back to full messages
function expandClientMessages(body) {
if (!Array.isArray(body.messages)) return;
console.log('Received client messages:', JSON.stringify(body.messages, null, 2));
body.messages = body.messages.flatMap(m => {
var recovered = [];
var match;
while ((match = m.content.match(/^function-call:([^\n]+)(?:\n(.*)|)$/s)) != null) {
var id = match[1];
m.content = match[2] != null ? match[2] : "";
if (messageStore.has(id))
recovered.push(...messageStore.get(id));
}
recovered.push(m);
return recovered;
});
}
async function callLLMJson(body) {
let messages = body.messages || [];
const prefixes = [];
let finalContent = '';
while (true) {
const upstream = { ...body, tools, stream: false, chat_format: 'chatml-function-calling', messages };
console.log('Sending to model:', JSON.stringify(upstream, null, 2));
const r1 = await axios.post(LLM_URL, upstream);
console.log('Model response:', JSON.stringify(r1.data, null, 2));
const c1 = r1.data.choices[0];
if (!(c1.finish_reason === 'tool_calls' && c1.message.tool_calls?.length)) {
finalContent = c1.message.content;
break;
}
const call = c1.message.tool_calls[0];
const fn = call.function.name;
const args = JSON.parse(call.function.arguments || '{}');
console.log(`Calling function ${fn} with args:`, args);
const resObj = await functions[fn](args);
console.log(`Function ${fn} result:`, resObj);
const randId = uuidv4();
prefixes.push(`function-call:${randId}\n`);
const fnMsg = { role: 'function', name: fn, content: JSON.stringify(resObj) };
messageStore.set(randId, [c1.message, fnMsg]);
messages = [...messages, c1.message, fnMsg];
continue;
}
return [...prefixes, finalContent];
}
app.post('/v1/chat/completions', async (req, res) => {
try {
// Expand IDs
expandClientMessages(req.body);
const isStream = req.body.stream === true;
const history = await callLLMJson(req.body);
console.log('Prepared history for client:', history);
if (isStream) {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.flushHeaders();
const emitChunk = (delta) => {
const chunk = { choices: [{ delta: delta, index: 0, finish_reason: null }]};
res.write(`data: ${JSON.stringify(chunk)}
`);
};
emitChunk({role: "assistant"});
for (const msg of history) {
emitChunk({content: msg});
}
res.write(`data: [DONE]
`);
console.log('Stream completed');
res.end();
} else {
const final = history[history.length - 1];
console.log('Sending non-stream response:', final);
res.json({ choices: [{ message: final, finish_reason: 'stop' }] });
}
} catch (err) {
console.error('Error in /v1/chat/completions:', err);
if (req.body.stream) {
res.write(`event:error
data:${JSON.stringify({ error: err.toString() })}
`);
res.end();
} else {
res.status(500).json({ error: err.toString() });
}
}
});
// Fallback proxy
app.use(async (req, res) => {
try {
const url = `${LLM_ORIGIN}${req.originalUrl}`;
const method = req.method.toLowerCase();
const headers = { ...req.headers, host: new URL(LLM_ORIGIN).host };
const data = ['get', 'delete'].includes(method) ? undefined : req.body;
const resp = await axios({ url, method, headers, data, responseType: data && req.body.stream ? 'stream' : 'json' });
if (data && req.body.stream) {
res.setHeader('Content-Type', 'text/event-stream');
resp.data.pipe(res);
} else {
res.status(resp.status).set(resp.headers).send(resp.data);
}
} catch (err) {
console.error('Fallback error', err);
res.status(502).json({ error: err.toString() });
}
});
app.listen(PORT, () => console.log(`Proxy listening on ${PORT}`));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment