Skip to content

Instantly share code, notes, and snippets.

@JonathanLoscalzo
Created July 1, 2024 18:51
Show Gist options
  • Save JonathanLoscalzo/77b096fc6630e81f9a783225b6de62a4 to your computer and use it in GitHub Desktop.
Save JonathanLoscalzo/77b096fc6630e81f9a783225b6de62a4 to your computer and use it in GitHub Desktop.
llm zoomcamp hw1
version: '3.8'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.14.1
container_name: elasticsearch
environment:
- bootstrap.memory_lock=true
- discovery.type=single-node
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms1024m -Xmx1024m"
ports:
- "9200:9200"
- "9300:9300"
ollama:
image: ollama/ollama
container_name: ollama
volumes:
- ollama:/root/.ollama
ports:
- "11434:11434"
volumes:
ollama:
import requests
docs_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1"
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()
documents = []
for course in documents_raw:
course_name = course["course"]
for doc in course["documents"]:
doc["course"] = course_name
documents.append(doc)
from elasticsearch import Elasticsearch
es_client = Elasticsearch("http://localhost:9200")
index_settings = {
"settings": {"number_of_shards": 1, "number_of_replicas": 0},
"mappings": {
"properties": {
"text": {"type": "text"},
"section": {"type": "text"},
"question": {"type": "text"},
"course": {"type": "keyword"},
}
},
}
index_name = "course-questions"
es_client.indices.create(index=index_name, body=index_settings)
for doc in documents:
es_client.index(index=index_name, document=doc) #Q1: index
from elasticsearch import Elasticsearch
es_client = Elasticsearch("http://localhost:9200")
index_name = "course-questions"
def elastic_search(query, course=None):
search_query = {
"size": 5,
"query": {
"bool": {
"must": {
"multi_match": {
"query": query,
"fields": ["question^4", "text"],
"type": "best_fields",
}
},
}
},
}
if course is not None:
search_query["query"]["bool"]["filter"] = {"term": {"course": course}}
response = es_client.search(index=index_name, body=search_query)
result_docs = []
for hit in response["hits"]["hits"]:
result_docs.append(hit["_source"])
return result_docs, response
query = "How do I execute a command in a running docker container?"
docs, result = elastic_search(query)
print(result.body["hits"]["max_score"]) # Q3: 84.05
docs, result = elastic_search(query, course="machine-learning-zoomcamp")
print(docs[2]["question"])
# Q4: How do I copy files from a different folder into docker container’s working directory?
def build_prompt(query, search_results):
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.
QUESTION: {question}
CONTEXT:
{context}
""".strip()
context_template = """
Q: {question}
A: {text}
""".strip()
context = ""
for doc in search_results:
context += (
context_template.format(
question=doc["question"],
text=doc["text"],
)
+ "\n\n"
).strip()
prompt = prompt_template.format(question=query, context=context).strip()
return prompt
prompt = build_prompt(query, docs[:3])
print(len(prompt)) # Q5: 1459
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o")
query_encoded = encoding.encode(prompt)
print(len(query_encoded)) # Q6: 322
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment