Skip to content

Instantly share code, notes, and snippets.

@rioki
Created May 12, 2025 09:14
Show Gist options
  • Save rioki/20701452774f9e1185526958d5414e1f to your computer and use it in GitHub Desktop.
Save rioki/20701452774f9e1185526958d5414e1f to your computer and use it in GitHub Desktop.

GPT Server

GPT Server is a GPT4All wrapper that exposes an OpenAI / ChatML style REST interface. It is designed to facilitate development against OpenAI and local AI with minimal code changes.

Setup

To get started with GPT Server, follow these steps:

  1. Clone the repository:

    git clone <repository_url>
    cd <repository_directory>
  2. Install dependencies:

    npm install
  3. Run the server:

    npm run

GPT4All Bug

There is a bug in GPT4All. If you pass a messages array with only one message it will crash in node_modules\gpt4all\src\util.js(68).

You can hotpatch the line with by changing this line to:

if (userAssistantMessages.length > 0 && userAssistantMessages[0].role !== "user") {

Usage

Once the server is running, you can interact with the REST API using your preferred HTTP client. The interface is designed to be compatible with OpenAI and ChatML standards, allowing for seamless integration with existing tools and workflows.

API Endpoints

/v1/chat/completions

  • Method: POST
  • Description: Facilitates a chat-like conversation with the model.
  • Request Body:
    {
        "messages": [
            {
                "role": "system",
                "content": "Your system message, optional."
            },
            {
                "role": "user",
                "content": "Your input text here."
            }
        ],
        "model": "A modoel ID",
        "max_tokens": 4096,
        "stream": false
    }
  • Response:
    {
        "id": "response_id",
        "usage": {
            "prompt_tokens": null,
            "total_tokens": null,
            "completion_tokens": 38,
            "n_past_tokens": 480
        },
        "choices": [
            {
                "message": {
                    "role": "assistant",
                    "content": "Awnser"
                }
            }
        ]
    }

Examples

See summerize.js for an example.

// GPT4All REST Service
//
// Copyright 2024 Sena Farrell
//
// This work is free. You can redistribute it and/or modify it under the
// terms of the Do What The Fuck You Want To Public License, Version 2,
// as published by Sam Hocevar. See the COPYING file for more details.
//
// This program is free software. It comes without any warranty, to
// the extent permitted by applicable law. You can redistribute it
// and/or modify it under the terms of the Do What The Fuck You Want
//To Public License, Version 2, as published by Sam Hocevar. See
// http://www.wtfpl.net/ for more details.
let path = require('path');
let restify = require("restify");
let morgan = require("morgan");
let corsMiddleware = require("restify-cors-middleware2");
let gpt4all = require("gpt4all");
let yargs = require('yargs/yargs')
let { hideBin } = require('yargs/helpers')
let argv = yargs(hideBin(process.argv)).argv
let server = restify.createServer();
server.use(morgan("dev"));
server.use(restify.plugins.bodyParser());
const cors = corsMiddleware({
preflightMaxAge: 5,
origins: ["*"],
allowHeaders: ["API-Token"],
exposeHeaders: ["API-Token-Expiry"]
});
server.pre(cors.preflight);
server.use(cors.actual);
let modelPath = argv.path ?? path.join(process.env.LOCALAPPDATA, "nomic.ai/GPT4All");
let modelDevice = argv.device ?? "cpu";
let models = [];
async function loadModel(modelId, contextSize) {
for (let entry of models) {
if (entry.modelId == modelId && entry.contextSize == contextSize) {
return entry.model;
}
}
let model = await gpt4all.loadModel(modelId, {
modelPath: modelPath,
device: modelDevice,
nCtx: contextSize,
});
models.push({
model,
contextSize,
modelId
})
return model;
}
async function completions(modelId, contextSize, messages) {
let model = await loadModel(modelId, contextSize);
return gpt4all.createCompletion(model, messages);
}
async function streamCompleations(modelId, contextSize, messages, response) {
let model = await loadModel(modelId, contextSize);
let stream = gpt4all.createCompletionStream(model, messages);
response.writeHead(200, {
'Content-Type': 'text/plain'
});
stream.tokens.on("data", (chunk) => {
response.write(chunk);
});
await stream.result;
response.end();
}
server.post("/v1/chat/completions", (req, res, next) => {
if (req.body.model == null) {
return res.send(417, "Expectation Failed\n\nNo model specified.");
}
if (req.body.messages == null || req.body.messages.length == 0) {
return res.send(417, "Expectation Failed\n\nNo messages.");
}
let model = req.body.model;
let max_tokens = req.body.max_tokens ?? 2048;
let messages = req.body.messages;
let stream = req.body.stream ?? false;
if (stream) {
streamCompleations(model, max_tokens, messages, res)
.then(next)
.catch(err => {
console.error(err);
res.send(500, "Internal Server Error");
next();
})
}
else {
completions(model, max_tokens, messages)
.then(comp => {
res.json(comp);
next();
})
.catch(err => {
console.error(err);
res.send(500, "Internal Server Error");
next();
})
}
});
let port = argv.port ?? 4200;
server.listen(port, function() {
console.log("%s listening at %s", server.name, server.url);
});
{
"name": "gpt-server",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"start": "nodemon index.js"
},
"author": "Sean Farrell <[email protected]>",
"license": "WTFPL",
"private": true,
"description": "GPT4All Wrapper",
"dependencies": {
"gpt4all": "^4.0.0",
"morgan": "^1.10.0",
"node-fetch": "^3.3.2",
"nodemon": "^3.1.4",
"restify": "^11.1.0",
"restify-cors-middleware2": "^2.2.1",
"yargs": "^17.7.2"
}
}
// GPT4All REST Service
//
// Copyright 2024 Sena Farrell
//
// This work is free. You can redistribute it and/or modify it under the
// terms of the Do What The Fuck You Want To Public License, Version 2,
// as published by Sam Hocevar. See the COPYING file for more details.
//
// This program is free software. It comes without any warranty, to
// the extent permitted by applicable law. You can redistribute it
// and/or modify it under the terms of the Do What The Fuck You Want
//To Public License, Version 2, as published by Sam Hocevar. See
// http://www.wtfpl.net/ for more details.
let fs = require('fs').promises;
let yargs = require('yargs/yargs')
let { hideBin } = require('yargs/helpers');
let argv = yargs(hideBin(process.argv)).argv
let endpoint = argv.endpoint ?? "http://localhost:4200/v1/chat/completions";
async function summerize(file) {
let contents = await fs.readFile(file, "utf-8");
let body = {
messages: [
{
role: "system",
content: "Your job, if you accept it, is to summarize everything you are shown in 15 words or less.",
},
{
role: "user",
content: contents,
}
],
model: "Meta-Llama-3.1-8B-Instruct.Q4_0.gguf",
max_tokens: 4096,
stream: false
};
let res = await fetch(endpoint, {
method: 'post',
body: JSON.stringify(body),
headers: {'Content-Type': 'application/json'}
});
let text = await res.text();
console.log(text);
let line = "";
console.log(line);
}
for (let file of argv._) {
summerize(file);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment