Skip to content

Instantly share code, notes, and snippets.

@superacidjax
Created April 20, 2025 19:09
Show Gist options
  • Save superacidjax/ba095b74ced292cff02adf7ab02e7ac7 to your computer and use it in GitHub Desktop.
Save superacidjax/ba095b74ced292cff02adf7ab02e7ac7 to your computer and use it in GitHub Desktop.
This is a Ruby program that takes a CSV and prompts a locally running Ollama LLM to generate a useful description suitable for publishing in a directory. This uses multithreading: you can specify the number of threads when involving at the command line.
require 'csv'
require 'net/http'
require 'json'
require 'uri'
require 'thread'
require 'optparse'
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: #{$PROGRAM_NAME} [options] <input.csv> [output.csv]"
opts.on("-m", "--model NAME", "Ollama model name to use (required)") do |m|
options[:model] = m
end
opts.on("-o", "--output FILE", "Output CSV file (optional; default is input filename with _with_descriptions.csv)") do |o|
options[:output] = o
end
opts.on("-t", "--threads N", Integer, "Number of concurrent threads (default: 4)") do |t|
options[:threads] = t
end
end.parse!
input_file = ARGV.shift
if input_file.nil?
STDERR.puts "Error: No input CSV file specified."
STDERR.puts "Usage: #{$PROGRAM_NAME} -m <model_name> <input.csv> [output.csv]"
exit 1
end
output_file = options[:output] || begin
base = File.basename(input_file, File.extname(input_file))
base + "_with_descriptions.csv"
end
model_name = options[:model] || ENV['OLLAMA_MODEL']
if model_name.to_s.strip.empty?
STDERR.puts "Error: No model name specified. Use -m or set OLLAMA_MODEL."
exit 1
end
thread_count = options[:threads] || 4
unless File.exist?(input_file)
STDERR.puts "Error: Input file not found – #{input_file}"
exit 1
end
# --- Ollama API settings ---
# Base URL for your local Ollama instance; override via OLLAMA_HOST environment variable if needed.
base_url = ENV['OLLAMA_HOST'] || 'http://127.0.0.1:11434'
base_url = "http://#{base_url}" unless base_url.start_with?("http")
uri = URI.parse(base_url)
uri.path = "/api/generate"
puts "Starting concurrent processing..."
puts "Input CSV: #{input_file}"
puts "Output CSV: #{output_file}"
puts "Using Model: #{model_name}"
puts "Using #{thread_count} threads"
puts "-------------------------------------"
def generate_description(row, model_name, uri)
data_lines = row.headers.map { |header| "#{header}: #{row[header]}" }
prompt_text = "I want you to write a directory listing in a paragraph for the data provided, " \
"do not include where customers can find more information. I do not want a reader to leave my website. The description should have the name of the car wash, the area, and the city, but the complete address is not needed. I also do not want a mention of the business hours, but I do want to mention amenities that exist, but not amenities that do not exist. We also should mention the average review score. We do not need to mention photos, toilets, or a website. Also, this should be in British English as this is for a UK based directory. no introduction, just the paragraph.\n\n" \
"#{data_lines.join("\n")}"
http = Net::HTTP.new(uri.host, uri.port)
http.open_timeout = 5
http.read_timeout = 60
request = Net::HTTP::Post.new(uri.request_uri, { "Content-Type" => "application/json" })
request.body = {
model: model_name,
prompt: prompt_text,
stream: false
}.to_json
description_text = ""
attempts = 0
begin
attempts += 1
response = http.request(request)
unless response.is_a?(Net::HTTPSuccess)
raise "HTTP #{response.code} - #{response.message}"
end
result = JSON.parse(response.body)
if result.is_a?(Hash) && result.key?("response")
description_text = result["response"].to_s.strip
else
raise "Unexpected API response format"
end
rescue StandardError => e
if attempts < 3
STDERR.puts "Warning: Attempt #{attempts} failed for a row (#{e.message}). Retrying..."
sleep 1
retry
else
STDERR.puts "Error: Giving up on a row after #{attempts} attempts (#{e.message})."
end
end
description_text
end
rows = CSV.read(input_file, headers: true)
queue = Queue.new
rows.each_with_index { |row, index| queue << [index, row] }
results = Array.new(rows.size)
thread_count.times do
threads << Thread.new do
while true
begin
index, row = queue.pop(true)
rescue ThreadError
break # Queue is empty
end
identifier = row["name"] || "Row #{index + 1}"
puts "Processing #{identifier} (row #{index + 1})..."
description = generate_description(row, model_name, uri)
results[index] = row.fields + [description]
puts "Completed row #{index + 1}."
end
end
end
threads.each(&:join)
CSV.open(output_file, "w") do |csv_out|
headers = rows.headers + ['description']
csv_out << headers
results.each { |fields| csv_out << fields }
end
puts "Processing complete! #{results.size} rows processed. Output written to #{output_file}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment