rioki · May 12, 2025 09:14
diff --git a/README.md b/README.md
diff --git a/gpt-server.js b/gpt-server.js
 // GPT4All REST Service
 //
 // Copyright 2024 Sena Farrell
 //
 // This work is free. You can redistribute it and/or modify it under the
 // terms of the Do What The Fuck You Want To Public License, Version 2,
 // as published by Sam Hocevar. See the COPYING file for more details.
 //
 // This program is free software. It comes without any warranty, to
 // the extent permitted by applicable law. You can redistribute it
 // and/or modify it under the terms of the Do What The Fuck You Want
 //To Public License, Version 2, as published by Sam Hocevar. See
 // http://www.wtfpl.net/ for more details.

 let path           = require('path');
 let restify        = require("restify");
 let morgan         = require("morgan");
 let corsMiddleware = require("restify-cors-middleware2");
 let gpt4all        = require("gpt4all");

 let yargs = require('yargs/yargs')
 let { hideBin } = require('yargs/helpers')
 let argv = yargs(hideBin(process.argv)).argv

 let server = restify.createServer();

 server.use(morgan("dev"));

 server.use(restify.plugins.bodyParser());

 const cors = corsMiddleware({
  preflightMaxAge: 5,
  origins: ["*"],
  allowHeaders: ["API-Token"],
  exposeHeaders: ["API-Token-Expiry"]
 });
  
 server.pre(cors.preflight);
 server.use(cors.actual);

 let modelPath   = argv.path ?? path.join(process.env.LOCALAPPDATA, "nomic.ai/GPT4All");
 let modelDevice = argv.device ?? "cpu";
 let models = [];

 async function loadModel(modelId, contextSize) {
  for (let entry of models) {
    if (entry.modelId == modelId && entry.contextSize == contextSize) {
      return entry.model;
    }
  }

  let model = await gpt4all.loadModel(modelId, {
    modelPath: modelPath,
    device: modelDevice,
    nCtx: contextSize, 
  });

  models.push({
    model,
    contextSize,
    modelId
  })

  return model;
 }

 async function completions(modelId, contextSize, messages) {
  let model = await loadModel(modelId, contextSize);
  return gpt4all.createCompletion(model, messages);
 }

 async function streamCompleations(modelId, contextSize, messages, response) {
  let model = await loadModel(modelId, contextSize);

  let stream = gpt4all.createCompletionStream(model, messages);

  response.writeHead(200, {
    'Content-Type': 'text/plain'
  });
  stream.tokens.on("data", (chunk) => {
    response.write(chunk);
  });
  await stream.result;
  response.end();
 }

 server.post("/v1/chat/completions", (req, res, next) => {
  if (req.body.model == null) {
    return res.send(417, "Expectation Failed\n\nNo model specified.");
  }
  if (req.body.messages == null || req.body.messages.length == 0) {
    return res.send(417, "Expectation Failed\n\nNo messages.");
  }

  let model      = req.body.model;
  let max_tokens = req.body.max_tokens ?? 2048;
  let messages   = req.body.messages;
  let stream     = req.body.stream ?? false;
  
  if (stream) {
    streamCompleations(model, max_tokens, messages, res)
      .then(next)
      .catch(err => {
        console.error(err);
        res.send(500, "Internal Server Error");
        next();
      })
  }
  else {
    completions(model, max_tokens, messages)
      .then(comp => {
        res.json(comp);
        next();
      })
      .catch(err => {
        console.error(err);
        res.send(500, "Internal Server Error");
        next();
      })
  }
 });

 let port = argv.port ?? 4200;
 server.listen(port, function() {
  console.log("%s listening at %s", server.name, server.url);
 });
diff --git a/package.json b/package.json
 {
  "name": "gpt-server",
  "version": "1.0.0",
  "main": "index.js",
  "scripts": {
    "start": "nodemon index.js"
  },
  "author": "Sean Farrell <[email protected]>",
  "license": "WTFPL",
  "private": true,
  "description": "GPT4All Wrapper",
  "dependencies": {
    "gpt4all": "^4.0.0",
    "morgan": "^1.10.0",
    "node-fetch": "^3.3.2",
    "nodemon": "^3.1.4",
    "restify": "^11.1.0",
    "restify-cors-middleware2": "^2.2.1",
    "yargs": "^17.7.2"
  }
 }
diff --git a/summerize.js b/summerize.js
 // GPT4All REST Service
 //
 // Copyright 2024 Sena Farrell
 //
 // This work is free. You can redistribute it and/or modify it under the
 // terms of the Do What The Fuck You Want To Public License, Version 2,
 // as published by Sam Hocevar. See the COPYING file for more details.
 //
 // This program is free software. It comes without any warranty, to
 // the extent permitted by applicable law. You can redistribute it
 // and/or modify it under the terms of the Do What The Fuck You Want
 //To Public License, Version 2, as published by Sam Hocevar. See
 // http://www.wtfpl.net/ for more details.

 let fs = require('fs').promises;
 let yargs = require('yargs/yargs')
 let { hideBin } = require('yargs/helpers');
 let argv = yargs(hideBin(process.argv)).argv

 let endpoint = argv.endpoint ?? "http://localhost:4200/v1/chat/completions";

 async function summerize(file) {
  let contents = await fs.readFile(file, "utf-8");

  let body = {
    messages: [
      {
        role: "system",
        content: "Your job, if you accept it, is to summarize everything you are shown in 15 words or less.",
      },
      {
        role: "user",
        content: contents,
      }
    ],
    model: "Meta-Llama-3.1-8B-Instruct.Q4_0.gguf",
    max_tokens: 4096,
    stream: false
  };

  let res = await fetch(endpoint, {
    method: 'post',
    body: JSON.stringify(body),
    headers: {'Content-Type': 'application/json'}  
  });

  let text = await res.text();
  console.log(text);

  let line = "";

  console.log(line);
 }

 for (let file of argv._) {
  summerize(file);
 }
	// GPT4All REST Service
	//
	// Copyright 2024 Sena Farrell
	//
	// This work is free. You can redistribute it and/or modify it under the
	// terms of the Do What The Fuck You Want To Public License, Version 2,
	// as published by Sam Hocevar. See the COPYING file for more details.
	//
	// This program is free software. It comes without any warranty, to
	// the extent permitted by applicable law. You can redistribute it
	// and/or modify it under the terms of the Do What The Fuck You Want
	//To Public License, Version 2, as published by Sam Hocevar. See
	// http://www.wtfpl.net/ for more details.

	let path = require('path');
	let restify = require("restify");
	let morgan = require("morgan");
	let corsMiddleware = require("restify-cors-middleware2");
	let gpt4all = require("gpt4all");

	let yargs = require('yargs/yargs')
	let { hideBin } = require('yargs/helpers')
	let argv = yargs(hideBin(process.argv)).argv

	let server = restify.createServer();

	server.use(morgan("dev"));

	server.use(restify.plugins.bodyParser());

	const cors = corsMiddleware({
	preflightMaxAge: 5,
	origins: ["*"],
	allowHeaders: ["API-Token"],
	exposeHeaders: ["API-Token-Expiry"]
	});

	server.pre(cors.preflight);
	server.use(cors.actual);

	let modelPath = argv.path ?? path.join(process.env.LOCALAPPDATA, "nomic.ai/GPT4All");
	let modelDevice = argv.device ?? "cpu";
	let models = [];

	async function loadModel(modelId, contextSize) {
	for (let entry of models) {
	if (entry.modelId == modelId && entry.contextSize == contextSize) {
	return entry.model;
	}
	}

	let model = await gpt4all.loadModel(modelId, {
	modelPath: modelPath,
	device: modelDevice,
	nCtx: contextSize,
	});

	models.push({
	model,
	contextSize,
	modelId
	})

	return model;
	}

	async function completions(modelId, contextSize, messages) {
	let model = await loadModel(modelId, contextSize);
	return gpt4all.createCompletion(model, messages);
	}

	async function streamCompleations(modelId, contextSize, messages, response) {
	let model = await loadModel(modelId, contextSize);

	let stream = gpt4all.createCompletionStream(model, messages);

	response.writeHead(200, {
	'Content-Type': 'text/plain'
	});
	stream.tokens.on("data", (chunk) => {
	response.write(chunk);
	});
	await stream.result;
	response.end();
	}

	server.post("/v1/chat/completions", (req, res, next) => {
	if (req.body.model == null) {
	return res.send(417, "Expectation Failed\n\nNo model specified.");
	}
	if (req.body.messages == null \|\| req.body.messages.length == 0) {
	return res.send(417, "Expectation Failed\n\nNo messages.");
	}

	let model = req.body.model;
	let max_tokens = req.body.max_tokens ?? 2048;
	let messages = req.body.messages;
	let stream = req.body.stream ?? false;

	if (stream) {
	streamCompleations(model, max_tokens, messages, res)
	.then(next)
	.catch(err => {
	console.error(err);
	res.send(500, "Internal Server Error");
	next();
	})
	}
	else {
	completions(model, max_tokens, messages)
	.then(comp => {
	res.json(comp);
	next();
	})
	.catch(err => {
	console.error(err);
	res.send(500, "Internal Server Error");
	next();
	})
	}
	});

	let port = argv.port ?? 4200;
	server.listen(port, function() {
	console.log("%s listening at %s", server.name, server.url);
	});
	{
	"name": "gpt-server",
	"version": "1.0.0",
	"main": "index.js",
	"scripts": {
	"start": "nodemon index.js"
	},
	"author": "Sean Farrell <[email protected]>",
	"license": "WTFPL",
	"private": true,
	"description": "GPT4All Wrapper",
	"dependencies": {
	"gpt4all": "^4.0.0",
	"morgan": "^1.10.0",
	"node-fetch": "^3.3.2",
	"nodemon": "^3.1.4",
	"restify": "^11.1.0",
	"restify-cors-middleware2": "^2.2.1",
	"yargs": "^17.7.2"
	}
	}