Created
April 20, 2025 19:09
-
-
Save superacidjax/ba095b74ced292cff02adf7ab02e7ac7 to your computer and use it in GitHub Desktop.
This is a Ruby program that takes a CSV and prompts a locally running Ollama LLM to generate a useful description suitable for publishing in a directory. This uses multithreading: you can specify the number of threads when involving at the command line.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
require 'net/http' | |
require 'json' | |
require 'uri' | |
require 'thread' | |
require 'optparse' | |
options = {} | |
OptionParser.new do |opts| | |
opts.banner = "Usage: #{$PROGRAM_NAME} [options] <input.csv> [output.csv]" | |
opts.on("-m", "--model NAME", "Ollama model name to use (required)") do |m| | |
options[:model] = m | |
end | |
opts.on("-o", "--output FILE", "Output CSV file (optional; default is input filename with _with_descriptions.csv)") do |o| | |
options[:output] = o | |
end | |
opts.on("-t", "--threads N", Integer, "Number of concurrent threads (default: 4)") do |t| | |
options[:threads] = t | |
end | |
end.parse! | |
input_file = ARGV.shift | |
if input_file.nil? | |
STDERR.puts "Error: No input CSV file specified." | |
STDERR.puts "Usage: #{$PROGRAM_NAME} -m <model_name> <input.csv> [output.csv]" | |
exit 1 | |
end | |
output_file = options[:output] || begin | |
base = File.basename(input_file, File.extname(input_file)) | |
base + "_with_descriptions.csv" | |
end | |
model_name = options[:model] || ENV['OLLAMA_MODEL'] | |
if model_name.to_s.strip.empty? | |
STDERR.puts "Error: No model name specified. Use -m or set OLLAMA_MODEL." | |
exit 1 | |
end | |
thread_count = options[:threads] || 4 | |
unless File.exist?(input_file) | |
STDERR.puts "Error: Input file not found – #{input_file}" | |
exit 1 | |
end | |
# --- Ollama API settings --- | |
# Base URL for your local Ollama instance; override via OLLAMA_HOST environment variable if needed. | |
base_url = ENV['OLLAMA_HOST'] || 'http://127.0.0.1:11434' | |
base_url = "http://#{base_url}" unless base_url.start_with?("http") | |
uri = URI.parse(base_url) | |
uri.path = "/api/generate" | |
puts "Starting concurrent processing..." | |
puts "Input CSV: #{input_file}" | |
puts "Output CSV: #{output_file}" | |
puts "Using Model: #{model_name}" | |
puts "Using #{thread_count} threads" | |
puts "-------------------------------------" | |
def generate_description(row, model_name, uri) | |
data_lines = row.headers.map { |header| "#{header}: #{row[header]}" } | |
prompt_text = "I want you to write a directory listing in a paragraph for the data provided, " \ | |
"do not include where customers can find more information. I do not want a reader to leave my website. The description should have the name of the car wash, the area, and the city, but the complete address is not needed. I also do not want a mention of the business hours, but I do want to mention amenities that exist, but not amenities that do not exist. We also should mention the average review score. We do not need to mention photos, toilets, or a website. Also, this should be in British English as this is for a UK based directory. no introduction, just the paragraph.\n\n" \ | |
"#{data_lines.join("\n")}" | |
http = Net::HTTP.new(uri.host, uri.port) | |
http.open_timeout = 5 | |
http.read_timeout = 60 | |
request = Net::HTTP::Post.new(uri.request_uri, { "Content-Type" => "application/json" }) | |
request.body = { | |
model: model_name, | |
prompt: prompt_text, | |
stream: false | |
}.to_json | |
description_text = "" | |
attempts = 0 | |
begin | |
attempts += 1 | |
response = http.request(request) | |
unless response.is_a?(Net::HTTPSuccess) | |
raise "HTTP #{response.code} - #{response.message}" | |
end | |
result = JSON.parse(response.body) | |
if result.is_a?(Hash) && result.key?("response") | |
description_text = result["response"].to_s.strip | |
else | |
raise "Unexpected API response format" | |
end | |
rescue StandardError => e | |
if attempts < 3 | |
STDERR.puts "Warning: Attempt #{attempts} failed for a row (#{e.message}). Retrying..." | |
sleep 1 | |
retry | |
else | |
STDERR.puts "Error: Giving up on a row after #{attempts} attempts (#{e.message})." | |
end | |
end | |
description_text | |
end | |
rows = CSV.read(input_file, headers: true) | |
queue = Queue.new | |
rows.each_with_index { |row, index| queue << [index, row] } | |
results = Array.new(rows.size) | |
thread_count.times do | |
threads << Thread.new do | |
while true | |
begin | |
index, row = queue.pop(true) | |
rescue ThreadError | |
break # Queue is empty | |
end | |
identifier = row["name"] || "Row #{index + 1}" | |
puts "Processing #{identifier} (row #{index + 1})..." | |
description = generate_description(row, model_name, uri) | |
results[index] = row.fields + [description] | |
puts "Completed row #{index + 1}." | |
end | |
end | |
end | |
threads.each(&:join) | |
CSV.open(output_file, "w") do |csv_out| | |
headers = rows.headers + ['description'] | |
csv_out << headers | |
results.each { |fields| csv_out << fields } | |
end | |
puts "Processing complete! #{results.size} rows processed. Output written to #{output_file}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment