kazuho · May 9, 2025 01:36
diff --git a/llmproxy.js b/llmproxy.js
 // proxy.js
 // Express server wrapping llama.cpp server with OpenAI function-calling semantics
 // Logs client requests/responses and model requests/responses to console

 const express = require('express');
 const axios = require('axios');
 const bodyParser = require('body-parser');
 const { v4: uuidv4 } = require('uuid');
 const { spawn } = require('child_process');

 const LLM_ORIGIN = 'http://r6:11434';
 const LLM_URL = `${LLM_ORIGIN}/v1/chat/completions`;
 const PORT = 11434;

 // In-memory map to track original messages by id
 const messageStore = new Map();

 // Function definitions
 function get_current_time() {
  return { utc: new Date().toISOString() };
 }
 get_current_time.llm = {
  description: 'returns current time in UTC (+0000)',
  parameters: { type: 'object', properties: {}, required: [] },
 };

 async function runBc(input) {
  return new Promise((resolve, reject) => {
    const bc = spawn('bc', ['-l']);
    let output = '';

    bc.stdout.on('data', (chunk) => {
      output += chunk.toString();
    });
    bc.stderr.on('data', (chunk) => {
      output += chunk.toString();
    });
    bc.on('error', (err) => {
      reject(err);
    });
    bc.on('close', (code) => {
      resolve(output);
    });
    bc.stdin.write(input.formula);
    bc.stdin.end();
  });
 }
 runBc.llm = {
  description: 'calculates formulas and returns the result, by calling the bc command',
  parameters: {
    type: 'object',
    properties: {
      formula: { type: "string", "decription": "the formula to calculate"},
    },
    required: ["formula"]
  }
 };

 const functions = Object.fromEntries(
  [get_current_time, runBc].map(def => [def.name, def])
 );

 const tools = Object.keys(functions).map(name => ({
  type: 'function',
  function: {
    name: name,
    description: functions[name].llm.description,
    parameters: functions[name].llm.parameters
  }
 }));

 const app = express();
 app.use(bodyParser.json());

 // Expand any message IDs back to full messages
 function expandClientMessages(body) {
  if (!Array.isArray(body.messages)) return;
  console.log('Received client messages:', JSON.stringify(body.messages, null, 2));
  body.messages = body.messages.flatMap(m => {
    var recovered = [];
    var match;
    while ((match = m.content.match(/^function-call:([^\n]+)(?:\n(.*)|)$/s)) != null) {
      var id = match[1];
      m.content = match[2] != null ? match[2] : "";
      if (messageStore.has(id))
        recovered.push(...messageStore.get(id));
    }
    recovered.push(m);
    return recovered;
  });
 }

 async function callLLMJson(body) {
  let messages = body.messages || [];
  const prefixes = [];
  let finalContent = '';

  while (true) {
    const upstream = { ...body, tools, stream: false, chat_format: 'chatml-function-calling', messages };
    console.log('Sending to model:', JSON.stringify(upstream, null, 2));

    const r1 = await axios.post(LLM_URL, upstream);
    console.log('Model response:', JSON.stringify(r1.data, null, 2));
    const c1 = r1.data.choices[0];

    if (!(c1.finish_reason === 'tool_calls' && c1.message.tool_calls?.length)) {
      finalContent = c1.message.content;
      break;
    }
    const call = c1.message.tool_calls[0];
    const fn = call.function.name;
    const args = JSON.parse(call.function.arguments || '{}');

    console.log(`Calling function ${fn} with args:`, args);
    const resObj = await functions[fn](args);
    console.log(`Function ${fn} result:`, resObj);

    const randId = uuidv4();
    prefixes.push(`function-call:${randId}\n`);

    const fnMsg = { role: 'function', name: fn, content: JSON.stringify(resObj) };

    messageStore.set(randId, [c1.message, fnMsg]);
    messages = [...messages, c1.message, fnMsg];
    continue;
  }

  return [...prefixes, finalContent];
 }

 app.post('/v1/chat/completions', async (req, res) => {
  try {
    // Expand IDs
    expandClientMessages(req.body);

    const isStream = req.body.stream === true;
    const history = await callLLMJson(req.body);

    console.log('Prepared history for client:', history);
    if (isStream) {
      res.setHeader('Content-Type', 'text/event-stream');
      res.setHeader('Cache-Control', 'no-cache');
      res.setHeader('Connection', 'keep-alive');
      res.flushHeaders();

      const emitChunk = (delta) => {
        const chunk = { choices: [{ delta: delta, index: 0, finish_reason: null }]};
        res.write(`data: ${JSON.stringify(chunk)}

 `);
      };

      emitChunk({role: "assistant"});
      for (const msg of history) {
        emitChunk({content: msg});
      }
      res.write(`data: [DONE]

 `);
      console.log('Stream completed');
      res.end();
    } else {
      const final = history[history.length - 1];
      console.log('Sending non-stream response:', final);
      res.json({ choices: [{ message: final, finish_reason: 'stop' }] });
    }
  } catch (err) {
    console.error('Error in /v1/chat/completions:', err);
    if (req.body.stream) {
      res.write(`event:error
 data:${JSON.stringify({ error: err.toString() })}

 `);
      res.end();
    } else {
      res.status(500).json({ error: err.toString() });
    }
  }
 });

 // Fallback proxy
 app.use(async (req, res) => {
  try {
    const url = `${LLM_ORIGIN}${req.originalUrl}`;
    const method = req.method.toLowerCase();
    const headers = { ...req.headers, host: new URL(LLM_ORIGIN).host };
    const data = ['get', 'delete'].includes(method) ? undefined : req.body;
    const resp = await axios({ url, method, headers, data, responseType: data && req.body.stream ? 'stream' : 'json' });
    if (data && req.body.stream) {
      res.setHeader('Content-Type', 'text/event-stream');
      resp.data.pipe(res);
    } else {
      res.status(resp.status).set(resp.headers).send(resp.data);
    }
  } catch (err) {
    console.error('Fallback error', err);
    res.status(502).json({ error: err.toString() });
  }
 });

 app.listen(PORT, () => console.log(`Proxy listening on ${PORT}`));
	// proxy.js
	// Express server wrapping llama.cpp server with OpenAI function-calling semantics
	// Logs client requests/responses and model requests/responses to console

	const express = require('express');
	const axios = require('axios');
	const bodyParser = require('body-parser');
	const { v4: uuidv4 } = require('uuid');
	const { spawn } = require('child_process');

	const LLM_ORIGIN = 'http://r6:11434';
	const LLM_URL = `${LLM_ORIGIN}/v1/chat/completions`;
	const PORT = 11434;

	// In-memory map to track original messages by id
	const messageStore = new Map();

	// Function definitions
	function get_current_time() {
	return { utc: new Date().toISOString() };
	}
	get_current_time.llm = {
	description: 'returns current time in UTC (+0000)',
	parameters: { type: 'object', properties: {}, required: [] },
	};

	async function runBc(input) {
	return new Promise((resolve, reject) => {
	const bc = spawn('bc', ['-l']);
	let output = '';

	bc.stdout.on('data', (chunk) => {
	output += chunk.toString();
	});
	bc.stderr.on('data', (chunk) => {
	output += chunk.toString();
	});
	bc.on('error', (err) => {
	reject(err);
	});
	bc.on('close', (code) => {
	resolve(output);
	});
	bc.stdin.write(input.formula);
	bc.stdin.end();
	});
	}
	runBc.llm = {
	description: 'calculates formulas and returns the result, by calling the bc command',
	parameters: {
	type: 'object',
	properties: {
	formula: { type: "string", "decription": "the formula to calculate"},
	},
	required: ["formula"]
	}
	};

	const functions = Object.fromEntries(
	[get_current_time, runBc].map(def => [def.name, def])
	);

	const tools = Object.keys(functions).map(name => ({
	type: 'function',
	function: {
	name: name,
	description: functions[name].llm.description,
	parameters: functions[name].llm.parameters
	}
	}));

	const app = express();
	app.use(bodyParser.json());

	// Expand any message IDs back to full messages
	function expandClientMessages(body) {
	if (!Array.isArray(body.messages)) return;
	console.log('Received client messages:', JSON.stringify(body.messages, null, 2));
	body.messages = body.messages.flatMap(m => {
	var recovered = [];
	var match;
	while ((match = m.content.match(/^function-call:([^\n]+)(?:\n(.*)\|)$/s)) != null) {
	var id = match[1];
	m.content = match[2] != null ? match[2] : "";
	if (messageStore.has(id))
	recovered.push(...messageStore.get(id));
	}
	recovered.push(m);
	return recovered;
	});
	}

	async function callLLMJson(body) {
	let messages = body.messages \|\| [];
	const prefixes = [];
	let finalContent = '';

	while (true) {
	const upstream = { ...body, tools, stream: false, chat_format: 'chatml-function-calling', messages };
	console.log('Sending to model:', JSON.stringify(upstream, null, 2));

	const r1 = await axios.post(LLM_URL, upstream);
	console.log('Model response:', JSON.stringify(r1.data, null, 2));
	const c1 = r1.data.choices[0];

	if (!(c1.finish_reason === 'tool_calls' && c1.message.tool_calls?.length)) {
	finalContent = c1.message.content;
	break;
	}
	const call = c1.message.tool_calls[0];
	const fn = call.function.name;
	const args = JSON.parse(call.function.arguments \|\| '{}');

	console.log(`Calling function ${fn} with args:`, args);
	const resObj = await functions[fn](args);
	console.log(`Function ${fn} result:`, resObj);

	const randId = uuidv4();
	prefixes.push(`function-call:${randId}\n`);

	const fnMsg = { role: 'function', name: fn, content: JSON.stringify(resObj) };

	messageStore.set(randId, [c1.message, fnMsg]);
	messages = [...messages, c1.message, fnMsg];
	continue;
	}

	return [...prefixes, finalContent];
	}

	app.post('/v1/chat/completions', async (req, res) => {
	try {
	// Expand IDs
	expandClientMessages(req.body);

	const isStream = req.body.stream === true;
	const history = await callLLMJson(req.body);

	console.log('Prepared history for client:', history);
	if (isStream) {
	res.setHeader('Content-Type', 'text/event-stream');
	res.setHeader('Cache-Control', 'no-cache');
	res.setHeader('Connection', 'keep-alive');
	res.flushHeaders();

	const emitChunk = (delta) => {
	const chunk = { choices: [{ delta: delta, index: 0, finish_reason: null }]};
	res.write(`data: ${JSON.stringify(chunk)}

	`);
	};

	emitChunk({role: "assistant"});
	for (const msg of history) {
	emitChunk({content: msg});
	}
	res.write(`data: [DONE]

	`);
	console.log('Stream completed');
	res.end();
	} else {
	const final = history[history.length - 1];
	console.log('Sending non-stream response:', final);
	res.json({ choices: [{ message: final, finish_reason: 'stop' }] });
	}
	} catch (err) {
	console.error('Error in /v1/chat/completions:', err);
	if (req.body.stream) {
	res.write(`event:error
	data:${JSON.stringify({ error: err.toString() })}

	`);
	res.end();
	} else {
	res.status(500).json({ error: err.toString() });
	}
	}
	});

	// Fallback proxy
	app.use(async (req, res) => {
	try {
	const url = `${LLM_ORIGIN}${req.originalUrl}`;
	const method = req.method.toLowerCase();
	const headers = { ...req.headers, host: new URL(LLM_ORIGIN).host };
	const data = ['get', 'delete'].includes(method) ? undefined : req.body;
	const resp = await axios({ url, method, headers, data, responseType: data && req.body.stream ? 'stream' : 'json' });
	if (data && req.body.stream) {
	res.setHeader('Content-Type', 'text/event-stream');
	resp.data.pipe(res);
	} else {
	res.status(resp.status).set(resp.headers).send(resp.data);
	}
	} catch (err) {
	console.error('Fallback error', err);
	res.status(502).json({ error: err.toString() });
	}
	});

	app.listen(PORT, () => console.log(`Proxy listening on ${PORT}`));