Skip to content

Instantly share code, notes, and snippets.

@marklocklear
Last active January 28, 2025 14:57
Show Gist options
  • Save marklocklear/649fed3a19aca84ff46d4d29553569e8 to your computer and use it in GitHub Desktop.
Save marklocklear/649fed3a19aca84ff46d4d29553569e8 to your computer and use it in GitHub Desktop.
Ruby Rag
# Landon Gray https://x.com/thedayisntgray/status/1880245705450930317
require 'httparty'
require 'numo/narray'
require 'openai'
require 'faiss'
require 'matrix'
require 'io/console'
require 'dotenv'
Dotenv.load
#replace this with your data source of choice
# response = HTTParty.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt')
response = HTTParty.get('https://imsdb.com/scripts/A-Few-Good-Men.html')
text = response.body
File.open('essay.txt', 'w') { |file| file.write(text) }
text.length
chunk_size = 2048
chunks = text.chars.each_slice(chunk_size).map(&:join)
chunks.length
# Configure the OpenAI API client
OpenAI.configure do |config|
config.access_token = ENV.fetch("OPENAI_API_KEY") # Assumes you've set the environment variable
end
# Initialize the OpenAI client
client = OpenAI::Client.new
def get_text_embedding(client, input)
response = client.embeddings(
parameters: {
model: 'text-embedding-3-small',
input: input
}
)
response.dig('data', 0, 'embedding')
end
text_embeddings = chunks.map { |chunk| get_text_embedding(client, chunk) }
text_embeddings = Numo::DFloat[*text_embeddings]
text_embeddings.shape
d = text_embeddings.shape[1]
index = Faiss::IndexFlatL2.new(d)
index.add(text_embeddings)
question = "Who are the main characters in this movie?"
question_embedding = get_text_embedding(client, question)
question_embeddings = question_embedding
distances, indices = index.search([question_embeddings], 2)
index_array = indices.to_a[0]
retrieved_chunks = index_array.map { |i| chunks[i] }
prompt = <<-PROMPT
Context information is below.
#{retrieved_chunks.join("\n------------------------\n")}
Given the context information and not prior knowledge, answer the query.
Query: #{question}
Answer:
PROMPT
def run_completion(client, user_message, model: 'gpt-3.5-turbo')
response = client.chat(
parameters: {
model: model,
messages: [{ role: 'user', content: user_message }],
temperature: 0.0
}
)
response.dig('choices', 0, 'message', 'content')
end
puts run_completion(client, prompt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment