|
// GPT4All REST Service |
|
// |
|
// Copyright 2024 Sena Farrell |
|
// |
|
// This work is free. You can redistribute it and/or modify it under the |
|
// terms of the Do What The Fuck You Want To Public License, Version 2, |
|
// as published by Sam Hocevar. See the COPYING file for more details. |
|
// |
|
// This program is free software. It comes without any warranty, to |
|
// the extent permitted by applicable law. You can redistribute it |
|
// and/or modify it under the terms of the Do What The Fuck You Want |
|
//To Public License, Version 2, as published by Sam Hocevar. See |
|
// http://www.wtfpl.net/ for more details. |
|
|
|
let path = require('path'); |
|
let restify = require("restify"); |
|
let morgan = require("morgan"); |
|
let corsMiddleware = require("restify-cors-middleware2"); |
|
let gpt4all = require("gpt4all"); |
|
|
|
let yargs = require('yargs/yargs') |
|
let { hideBin } = require('yargs/helpers') |
|
let argv = yargs(hideBin(process.argv)).argv |
|
|
|
let server = restify.createServer(); |
|
|
|
server.use(morgan("dev")); |
|
|
|
server.use(restify.plugins.bodyParser()); |
|
|
|
const cors = corsMiddleware({ |
|
preflightMaxAge: 5, |
|
origins: ["*"], |
|
allowHeaders: ["API-Token"], |
|
exposeHeaders: ["API-Token-Expiry"] |
|
}); |
|
|
|
server.pre(cors.preflight); |
|
server.use(cors.actual); |
|
|
|
let modelPath = argv.path ?? path.join(process.env.LOCALAPPDATA, "nomic.ai/GPT4All"); |
|
let modelDevice = argv.device ?? "cpu"; |
|
let models = []; |
|
|
|
async function loadModel(modelId, contextSize) { |
|
for (let entry of models) { |
|
if (entry.modelId == modelId && entry.contextSize == contextSize) { |
|
return entry.model; |
|
} |
|
} |
|
|
|
let model = await gpt4all.loadModel(modelId, { |
|
modelPath: modelPath, |
|
device: modelDevice, |
|
nCtx: contextSize, |
|
}); |
|
|
|
models.push({ |
|
model, |
|
contextSize, |
|
modelId |
|
}) |
|
|
|
return model; |
|
} |
|
|
|
async function completions(modelId, contextSize, messages) { |
|
let model = await loadModel(modelId, contextSize); |
|
return gpt4all.createCompletion(model, messages); |
|
} |
|
|
|
async function streamCompleations(modelId, contextSize, messages, response) { |
|
let model = await loadModel(modelId, contextSize); |
|
|
|
let stream = gpt4all.createCompletionStream(model, messages); |
|
|
|
response.writeHead(200, { |
|
'Content-Type': 'text/plain' |
|
}); |
|
stream.tokens.on("data", (chunk) => { |
|
response.write(chunk); |
|
}); |
|
await stream.result; |
|
response.end(); |
|
} |
|
|
|
server.post("/v1/chat/completions", (req, res, next) => { |
|
if (req.body.model == null) { |
|
return res.send(417, "Expectation Failed\n\nNo model specified."); |
|
} |
|
if (req.body.messages == null || req.body.messages.length == 0) { |
|
return res.send(417, "Expectation Failed\n\nNo messages."); |
|
} |
|
|
|
let model = req.body.model; |
|
let max_tokens = req.body.max_tokens ?? 2048; |
|
let messages = req.body.messages; |
|
let stream = req.body.stream ?? false; |
|
|
|
if (stream) { |
|
streamCompleations(model, max_tokens, messages, res) |
|
.then(next) |
|
.catch(err => { |
|
console.error(err); |
|
res.send(500, "Internal Server Error"); |
|
next(); |
|
}) |
|
} |
|
else { |
|
completions(model, max_tokens, messages) |
|
.then(comp => { |
|
res.json(comp); |
|
next(); |
|
}) |
|
.catch(err => { |
|
console.error(err); |
|
res.send(500, "Internal Server Error"); |
|
next(); |
|
}) |
|
} |
|
}); |
|
|
|
let port = argv.port ?? 4200; |
|
server.listen(port, function() { |
|
console.log("%s listening at %s", server.name, server.url); |
|
}); |