Last active
March 1, 2025 21:17
-
-
Save boxabirds/d8c49e954b483ebe3ecb2c0d4a64e2b0 to your computer and use it in GitHub Desktop.
Use Ollama to run an AI summariser on your machine for free
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"context" | |
"flag" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"time" | |
"github.com/sashabaranov/go-openai" | |
) | |
func main() { | |
// This is fast, relatively small model from Google that summarises decently | |
DEFAULT_OPEN_WEIGHTS_MODEL := "gemma:2b" | |
IGNORED_OLLAMA_API_KEY := "ollama" | |
// This is the ollama server installed from ollama.com | |
DEFAULT_OLLAMA_SERVER_URL := "http://localhost:11434/v1" | |
inputFile := flag.String("input-file", "", "Path to the input text file") | |
inputText := flag.String("input-text", "", "Input text to summarize") | |
model := flag.String("model", DEFAULT_OPEN_WEIGHTS_MODEL, "Model to use for the API") | |
baseURL := flag.String("base-url", DEFAULT_OLLAMA_SERVER_URL, "Base URL for the Ollama server (which is OpenAI-compatible)") | |
maxTokens := flag.Int("max-tokens", 200, "Maximum number of tokens in the summary") | |
flag.Parse() | |
// Define the system prompt | |
systemPrompt := `You are a text summarization assistant. | |
Generate a concise summary of the given input text while preserving the key information and main points. | |
Provide the summary in three bullet points, totalling 100 words or less.` | |
var userMessage string | |
if *inputFile != "" { | |
// Read input from file | |
content, err := os.ReadFile(*inputFile) | |
if err != nil { | |
log.Fatalf("Error reading input file: %v\n", err) | |
} | |
userMessage = string(content) | |
} else if *inputText != "" { | |
// Use input text from command-line argument | |
userMessage = *inputText | |
} else { | |
log.Fatal("Either input-file or input-text must be provided") | |
} | |
config := openai.DefaultConfig(IGNORED_OLLAMA_API_KEY) | |
config.BaseURL = *baseURL | |
client := openai.NewClientWithConfig(config) | |
ctx := context.Background() | |
start := time.Now() | |
// We send a request to Ollama via the OpenAI protocol | |
// in this example we don't do streaming, because the response will be generated very quickly particularly if you're using an M1+ Mac | |
req := openai.ChatCompletionRequest{ | |
Model: *model, | |
// these is the output token length | |
MaxTokens: *maxTokens, | |
Stream: true, | |
Messages: []openai.ChatCompletionMessage{ | |
{ | |
Role: openai.ChatMessageRoleSystem, | |
Content: systemPrompt, | |
}, | |
{ | |
Role: openai.ChatMessageRoleUser, | |
Content: userMessage, | |
}, | |
}, | |
} | |
stream, err := client.CreateChatCompletionStream(ctx, req) | |
if err != nil { | |
log.Fatalf("ChatCompletionStream error: %v\n", err) | |
} | |
defer stream.Close() | |
fmt.Printf("Summary: \n") | |
var content string | |
var completionTokens int | |
for { | |
response, err := stream.Recv() | |
if err != nil { | |
if err == io.EOF { | |
break | |
} | |
log.Fatalf("Stream error: %v\n", err) | |
} | |
content += response.Choices[0].Delta.Content | |
fmt.Printf(response.Choices[0].Delta.Content) | |
completionTokens += len(response.Choices[0].Delta.Content) | |
} | |
fmt.Printf("Summary: \n%s\n", content) | |
elapsed := time.Since(start) | |
fmt.Printf("\n\nTokens generated: %d\n", completionTokens) | |
fmt.Printf("Output tokens per Second: %.2f\n", float64(completionTokens)/elapsed.Seconds()) | |
fmt.Printf("Total Execution Time: %s\n", elapsed) | |
// TIP: the FIRST time you run this code, the model is loaded into memory, and this will | |
// be slow. On my Mac M2 I got 4 tokens/s. | |
// But run it a second time within 5 minutes and it'll run 10x faster! | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment