Last active
May 9, 2025 01:36
-
-
Save kazuho/f67b96b218e5e90411140fab4e8db390 to your computer and use it in GitHub Desktop.
proxy that adds function call capabilities to llama.cpp server
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// proxy.js | |
// Express server wrapping llama.cpp server with OpenAI function-calling semantics | |
// Logs client requests/responses and model requests/responses to console | |
const express = require('express'); | |
const axios = require('axios'); | |
const bodyParser = require('body-parser'); | |
const { v4: uuidv4 } = require('uuid'); | |
const { spawn } = require('child_process'); | |
const LLM_ORIGIN = 'http://r6:11434'; | |
const LLM_URL = `${LLM_ORIGIN}/v1/chat/completions`; | |
const PORT = 11434; | |
// In-memory map to track original messages by id | |
const messageStore = new Map(); | |
// Function definitions | |
function get_current_time() { | |
return { utc: new Date().toISOString() }; | |
} | |
get_current_time.llm = { | |
description: 'returns current time in UTC (+0000)', | |
parameters: { type: 'object', properties: {}, required: [] }, | |
}; | |
async function runBc(input) { | |
return new Promise((resolve, reject) => { | |
const bc = spawn('bc', ['-l']); | |
let output = ''; | |
bc.stdout.on('data', (chunk) => { | |
output += chunk.toString(); | |
}); | |
bc.stderr.on('data', (chunk) => { | |
output += chunk.toString(); | |
}); | |
bc.on('error', (err) => { | |
reject(err); | |
}); | |
bc.on('close', (code) => { | |
resolve(output); | |
}); | |
bc.stdin.write(input.formula); | |
bc.stdin.end(); | |
}); | |
} | |
runBc.llm = { | |
description: 'calculates formulas and returns the result, by calling the bc command', | |
parameters: { | |
type: 'object', | |
properties: { | |
formula: { type: "string", "decription": "the formula to calculate"}, | |
}, | |
required: ["formula"] | |
} | |
}; | |
const functions = Object.fromEntries( | |
[get_current_time, runBc].map(def => [def.name, def]) | |
); | |
const tools = Object.keys(functions).map(name => ({ | |
type: 'function', | |
function: { | |
name: name, | |
description: functions[name].llm.description, | |
parameters: functions[name].llm.parameters | |
} | |
})); | |
const app = express(); | |
app.use(bodyParser.json()); | |
// Expand any message IDs back to full messages | |
function expandClientMessages(body) { | |
if (!Array.isArray(body.messages)) return; | |
console.log('Received client messages:', JSON.stringify(body.messages, null, 2)); | |
body.messages = body.messages.flatMap(m => { | |
var recovered = []; | |
var match; | |
while ((match = m.content.match(/^function-call:([^\n]+)(?:\n(.*)|)$/s)) != null) { | |
var id = match[1]; | |
m.content = match[2] != null ? match[2] : ""; | |
if (messageStore.has(id)) | |
recovered.push(...messageStore.get(id)); | |
} | |
recovered.push(m); | |
return recovered; | |
}); | |
} | |
async function callLLMJson(body) { | |
let messages = body.messages || []; | |
const prefixes = []; | |
let finalContent = ''; | |
while (true) { | |
const upstream = { ...body, tools, stream: false, chat_format: 'chatml-function-calling', messages }; | |
console.log('Sending to model:', JSON.stringify(upstream, null, 2)); | |
const r1 = await axios.post(LLM_URL, upstream); | |
console.log('Model response:', JSON.stringify(r1.data, null, 2)); | |
const c1 = r1.data.choices[0]; | |
if (!(c1.finish_reason === 'tool_calls' && c1.message.tool_calls?.length)) { | |
finalContent = c1.message.content; | |
break; | |
} | |
const call = c1.message.tool_calls[0]; | |
const fn = call.function.name; | |
const args = JSON.parse(call.function.arguments || '{}'); | |
console.log(`Calling function ${fn} with args:`, args); | |
const resObj = await functions[fn](args); | |
console.log(`Function ${fn} result:`, resObj); | |
const randId = uuidv4(); | |
prefixes.push(`function-call:${randId}\n`); | |
const fnMsg = { role: 'function', name: fn, content: JSON.stringify(resObj) }; | |
messageStore.set(randId, [c1.message, fnMsg]); | |
messages = [...messages, c1.message, fnMsg]; | |
continue; | |
} | |
return [...prefixes, finalContent]; | |
} | |
app.post('/v1/chat/completions', async (req, res) => { | |
try { | |
// Expand IDs | |
expandClientMessages(req.body); | |
const isStream = req.body.stream === true; | |
const history = await callLLMJson(req.body); | |
console.log('Prepared history for client:', history); | |
if (isStream) { | |
res.setHeader('Content-Type', 'text/event-stream'); | |
res.setHeader('Cache-Control', 'no-cache'); | |
res.setHeader('Connection', 'keep-alive'); | |
res.flushHeaders(); | |
const emitChunk = (delta) => { | |
const chunk = { choices: [{ delta: delta, index: 0, finish_reason: null }]}; | |
res.write(`data: ${JSON.stringify(chunk)} | |
`); | |
}; | |
emitChunk({role: "assistant"}); | |
for (const msg of history) { | |
emitChunk({content: msg}); | |
} | |
res.write(`data: [DONE] | |
`); | |
console.log('Stream completed'); | |
res.end(); | |
} else { | |
const final = history[history.length - 1]; | |
console.log('Sending non-stream response:', final); | |
res.json({ choices: [{ message: final, finish_reason: 'stop' }] }); | |
} | |
} catch (err) { | |
console.error('Error in /v1/chat/completions:', err); | |
if (req.body.stream) { | |
res.write(`event:error | |
data:${JSON.stringify({ error: err.toString() })} | |
`); | |
res.end(); | |
} else { | |
res.status(500).json({ error: err.toString() }); | |
} | |
} | |
}); | |
// Fallback proxy | |
app.use(async (req, res) => { | |
try { | |
const url = `${LLM_ORIGIN}${req.originalUrl}`; | |
const method = req.method.toLowerCase(); | |
const headers = { ...req.headers, host: new URL(LLM_ORIGIN).host }; | |
const data = ['get', 'delete'].includes(method) ? undefined : req.body; | |
const resp = await axios({ url, method, headers, data, responseType: data && req.body.stream ? 'stream' : 'json' }); | |
if (data && req.body.stream) { | |
res.setHeader('Content-Type', 'text/event-stream'); | |
resp.data.pipe(res); | |
} else { | |
res.status(resp.status).set(resp.headers).send(resp.data); | |
} | |
} catch (err) { | |
console.error('Fallback error', err); | |
res.status(502).json({ error: err.toString() }); | |
} | |
}); | |
app.listen(PORT, () => console.log(`Proxy listening on ${PORT}`)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment