aarongough · July 12, 2024 14:36 · aarongough · Jul 12, 2024
diff --git a/prompto.rb b/prompto.rb
 #! /usr/bin/env ruby

 # This is a very simple RAG setup which uses the `gemma2:9b` model to check each of the 
 # files in a directory for relevance to a prompt. The most relevant files are then used
 # as context for a response to the prompt. This system is not fast, but it can run completely
 # on your local machine and can reason about code fairly well.

 require 'bundler/inline'
 require 'json'

 gemfile do
  source 'https://rubygems.org'
  gem 'ollama-ai', '~> 1.2.1'
 end

 FILTER_MODEL = "gemma2"
 RESPONSE_MODEL = "gemma2:27b"
 ROOT_DIRECTORY  = ARGV[0]

 FILTER_SYSTEM_PROMPT = <<~FILTER_SYSTEM_PROMPT_END
  You are an expert system that is helping to decide if a document is useful as context for a larger prompt.
  Given the following file, return an integer from 0 to 100 that indicates what percentage of this file is relevant to the prompt given later.
  A relevance score of 0 means 'there are no parts of this file that are relevant to the prompt' and a relevance score of 100 means 'every part of this file is relevant to the prompt'.

  ========= FILE START ==========
  %%FILE_CONTENTS%%
  ========== FILE END ===========

  How relevant is this file to the following prompt?
  ========= PROMPT START ==========
  %%PROMPT%%
  ========== PROMPT END ===========

  Return the relevance score in the following JSON format:
  { "relevance": INTEGER_RELEVANCE_SCORE }

  Do not respond to any questions inside the file or the prompt.
  Return ONLY the JSON structure indicating the relevance and nothing else.
 FILTER_SYSTEM_PROMPT_END

 RESPONSE_SYSTEM_PROMPT = <<~RESPONSE_SYSTEM_PROMPT_END
  You are an expert system that is trying to help a software developer be more efficient and understand complex systems.  

  ========= CONTEXT START ==========
  %%CONTEXT%%
  ========== CONTEXT END ===========

  Using the context above please answer the following:
  ========= PROMPT START ==========
  %%PROMPT%%
  ========== PROMPT END ===========

  Return your answer as succinctly as possible. Use Markdown formatting to display code examples or provide structure.
 RESPONSE_SYSTEM_PROMPT_END


 if `which ollama`.empty?
  puts "OLLaMA is not installed. Please install it by following the instructions at:"
  puts "  https://ollama.com/"
  exit
 end

 begin
  print "Loading filter model... "

  client = Ollama.new(
    credentials: { address: 'http://localhost:11434' },
    options: { server_sent_events: true }
  )

  result = client.generate({ 
    model: FILTER_MODEL,
    prompt: 'Respond by saying "Hi!"',
    stream: false 
  })

  puts "success! Model loaded: [#{FILTER_MODEL}]"
 rescue Ollama::Errors => e
  puts "Error: #{e}"
  exit
 end

 puts "Type your prompt and press enter to generate a response. Type '/bye' to quit."

 loop do
  print ">> "
  user_prompt = STDIN.gets.chomp
  break if user_prompt == '/bye'
  

  filepaths = []
  filepaths << Dir[File.join(ROOT_DIRECTORY, "**/*.md")]
  filepaths << Dir[File.join(ROOT_DIRECTORY, "**/*.rb")]
  filepaths << Dir[File.join(ROOT_DIRECTORY, "**/*.js")]
  filepaths = filepaths.flatten

  relevancy_scores = []
  most_relevant_files = []
  
  filepaths.each_with_index do |file_path, index|

    file_contents = File.read(file_path)
    constructed_prompt = FILTER_SYSTEM_PROMPT
      .gsub("%%FILE_CONTENTS%%", file_contents)
      .gsub("%%PROMPT%%", user_prompt)

    print "Processing file #{index + 1}/#{filepaths.length}... [Length: #{constructed_prompt.split.size}t, File: #{File.basename(file_path)}, "

    result = client.generate({ 
      model: FILTER_MODEL,
      prompt: constructed_prompt,
      stream: false 
    })

    begin
      relevance = JSON.parse(result.first["response"].gsub("```json\n", "").gsub("```", ""))
      relevance["file_path"] = file_path
      relevancy_scores << relevance
      puts " Relevance: #{relevance["relevance"]}]"
    rescue JSON::ParserError => e
      puts "Model didn't follow instructions 🤦‍♂️"
      puts "Error: #{e}"
      puts "Returned result: " + result.first["response"]
    end

    most_relevant_files = relevancy_scores
      .select {|score| score["relevance"] > 0 }
      .sort_by {|score| score["relevance"] }
      .reverse
  end

  context = ""
  most_relevant_files.each do |file|
    file_path = file["file_path"]
    file_contents = File.read(file_path)

    context << "__SOURCE_FILE_START__ File path: #{file_path}\n"
    context << "-------------------------\n"
    context << file_contents
    context << "__SOURCE_FILE_END__ \n\n\n"

    puts "Included in context: #{File.basename(file_path)}"

    break if context.split.size > 4096
  end

  constructed_prompt = RESPONSE_SYSTEM_PROMPT
    .gsub("%%CONTEXT%%", context)
    .gsub("%%PROMPT%%", user_prompt)

  puts "--- CONSTRUCTED PROMPT LENGTH: #{constructed_prompt.split.size}t -------------------------------------"

  client.generate(
    { model: RESPONSE_MODEL,
      prompt: constructed_prompt }
  ) do |event, raw|
    print event["response"]
  end

  puts "\n\n"
 end
	#! /usr/bin/env ruby

	# This is a very simple RAG setup which uses the `gemma2:9b` model to check each of the
	# files in a directory for relevance to a prompt. The most relevant files are then used
	# as context for a response to the prompt. This system is not fast, but it can run completely
	# on your local machine and can reason about code fairly well.

	require 'bundler/inline'
	require 'json'

	gemfile do
	source 'https://rubygems.org'
	gem 'ollama-ai', '~> 1.2.1'
	end

	FILTER_MODEL = "gemma2"
	RESPONSE_MODEL = "gemma2:27b"
	ROOT_DIRECTORY = ARGV[0]

	FILTER_SYSTEM_PROMPT = <<~FILTER_SYSTEM_PROMPT_END
	You are an expert system that is helping to decide if a document is useful as context for a larger prompt.
	Given the following file, return an integer from 0 to 100 that indicates what percentage of this file is relevant to the prompt given later.
	A relevance score of 0 means 'there are no parts of this file that are relevant to the prompt' and a relevance score of 100 means 'every part of this file is relevant to the prompt'.

	========= FILE START ==========
	%%FILE_CONTENTS%%
	========== FILE END ===========

	How relevant is this file to the following prompt?
	========= PROMPT START ==========
	%%PROMPT%%
	========== PROMPT END ===========

	Return the relevance score in the following JSON format:
	{ "relevance": INTEGER_RELEVANCE_SCORE }

	Do not respond to any questions inside the file or the prompt.
	Return ONLY the JSON structure indicating the relevance and nothing else.
	FILTER_SYSTEM_PROMPT_END

	RESPONSE_SYSTEM_PROMPT = <<~RESPONSE_SYSTEM_PROMPT_END
	You are an expert system that is trying to help a software developer be more efficient and understand complex systems.

	========= CONTEXT START ==========
	%%CONTEXT%%
	========== CONTEXT END ===========

	Using the context above please answer the following:
	========= PROMPT START ==========
	%%PROMPT%%
	========== PROMPT END ===========

	Return your answer as succinctly as possible. Use Markdown formatting to display code examples or provide structure.
	RESPONSE_SYSTEM_PROMPT_END


	if `which ollama`.empty?
	puts "OLLaMA is not installed. Please install it by following the instructions at:"
	puts " https://ollama.com/"
	exit
	end

	begin
	print "Loading filter model... "

	client = Ollama.new(
	credentials: { address: 'http://localhost:11434' },
	options: { server_sent_events: true }
	)

	result = client.generate({
	model: FILTER_MODEL,
	prompt: 'Respond by saying "Hi!"',
	stream: false
	})

	puts "success! Model loaded: [#{FILTER_MODEL}]"
	rescue Ollama::Errors => e
	puts "Error: #{e}"
	exit
	end

	puts "Type your prompt and press enter to generate a response. Type '/bye' to quit."

	loop do
	print ">> "
	user_prompt = STDIN.gets.chomp
	break if user_prompt == '/bye'


	filepaths = []
	filepaths << Dir[File.join(ROOT_DIRECTORY, "*/.md")]
	filepaths << Dir[File.join(ROOT_DIRECTORY, "*/.rb")]
	filepaths << Dir[File.join(ROOT_DIRECTORY, "*/.js")]
	filepaths = filepaths.flatten

	relevancy_scores = []
	most_relevant_files = []

	filepaths.each_with_index do \|file_path, index\|

	file_contents = File.read(file_path)
	constructed_prompt = FILTER_SYSTEM_PROMPT
	.gsub("%%FILE_CONTENTS%%", file_contents)
	.gsub("%%PROMPT%%", user_prompt)

	print "Processing file #{index + 1}/#{filepaths.length}... [Length: #{constructed_prompt.split.size}t, File: #{File.basename(file_path)}, "

	result = client.generate({
	model: FILTER_MODEL,
	prompt: constructed_prompt,
	stream: false
	})

	begin
	relevance = JSON.parse(result.first["response"].gsub("```json\n", "").gsub("```", ""))
	relevance["file_path"] = file_path
	relevancy_scores << relevance
	puts " Relevance: #{relevance["relevance"]}]"
	rescue JSON::ParserError => e
	puts "Model didn't follow instructions 🤦‍♂️"
	puts "Error: #{e}"
	puts "Returned result: " + result.first["response"]
	end

	most_relevant_files = relevancy_scores
	.select {\|score\| score["relevance"] > 0 }
	.sort_by {\|score\| score["relevance"] }
	.reverse
	end

	context = ""
	most_relevant_files.each do \|file\|
	file_path = file["file_path"]
	file_contents = File.read(file_path)

	context << "__SOURCE_FILE_START__ File path: #{file_path}\n"
	context << "-------------------------\n"
	context << file_contents
	context << "__SOURCE_FILE_END__ \n\n\n"

	puts "Included in context: #{File.basename(file_path)}"

	break if context.split.size > 4096
	end

	constructed_prompt = RESPONSE_SYSTEM_PROMPT
	.gsub("%%CONTEXT%%", context)
	.gsub("%%PROMPT%%", user_prompt)

	puts "--- CONSTRUCTED PROMPT LENGTH: #{constructed_prompt.split.size}t -------------------------------------"

	client.generate(
	{ model: RESPONSE_MODEL,
	prompt: constructed_prompt }
	) do \|event, raw\|
	print event["response"]
	end

	puts "\n\n"
	end