Created
June 29, 2024 08:47
-
-
Save gd03champ/9104cf47473a6ec6a49028c56b8055b5 to your computer and use it in GitHub Desktop.
log rag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from langchain_community.llms import Ollama | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain_community.document_loaders import DirectoryLoader | |
from langchain_community.embeddings import OllamaEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain.chains import create_retrieval_chain | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
from langchain.callbacks.manager import CallbackManager | |
from rich.console import Console | |
from rich.progress import Progress | |
from rich.panel import Panel | |
from rich.text import Text | |
from rich.markdown import Markdown | |
from rich.prompt import Prompt | |
import time | |
def rag_chat_init(dir_path, filename): | |
console = Console() | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# Invoke chain with RAG context | |
llm = Ollama(model="phi3", callbacks=callback_manager) | |
# Invoke chain with RAG context | |
loader = DirectoryLoader( | |
dir_path, | |
glob=filename, | |
use_multithreading=True | |
) | |
with console.status("[cyan]Loading log files..."): | |
docs = loader.load() | |
console.print(f"Total log files: {len(docs)}") | |
with console.status("[cyan]Creating vector store..."): | |
# Vector store things | |
embeddings = OllamaEmbeddings(model="nomic-embed-text") | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
split_documents = text_splitter.split_documents(docs) | |
vector_store = FAISS.from_documents(split_documents, embeddings) | |
# Construct prompt | |
prompt = ChatPromptTemplate.from_template( | |
""" | |
You are in an intelligent rag system named logrctx for log analysis and given some extracted parts from logs as context through rag system along with a question to answer. | |
If you don't know the answer, just say "I don't know." Don't try to make up an answer. | |
Don't provide any information that is not directly relevant to the question. Like debugging information, reasoning, recommendation or any extra context unless asked. | |
Just provide what's asked from the given context by summarizing the context. | |
Prefer to use markdown format wherever possible for visually appealing output and use time from the logs in your response if needed for consise response. | |
Keep the response as short and to the point as possible while not leaving any important information out. | |
Use only the following pieces of context to answer the question at the end. | |
Context: {context} | |
Question: {input} | |
""" | |
) | |
# Custom retrieval and generation process | |
def custom_retrieval_chain(query): | |
with console.status("[cyan]Retrieving relevant logs..."): | |
docs = vector_store.similarity_search(query, k=5) | |
console.print("[green]Context mapped successfully.") | |
console.print("Retrieved docs 👇 ") | |
for doc in docs: | |
console.print(Panel.fit("[cyan]" + str(doc.metadata['source']) + "[/cyan]\n" + str(doc.page_content))) | |
console.print("[cyan]Generating response...") | |
response = docs_chain.invoke({"context": docs, "input": query}) | |
return response | |
# Retrieve context from vector store | |
docs_chain = create_stuff_documents_chain(llm, prompt) | |
#retriever = vector_store.as_retriever() | |
# Winner winner chicken dinner | |
console.print("\n[bold cyan]Invoking chain...") | |
while True: | |
query = Prompt.ask("[bold green]Prompt[/bold green]") | |
if query.lower() in ["exit", "quit"]: | |
break | |
response = custom_retrieval_chain(query) | |
print("\n") | |
console.print(Panel.fit("[bold green] logrctx ai 🧠 [/bold green]")) | |
console.print(Panel.fit(Markdown(f"{response}"))) | |
if __name__ == "__main__": | |
rag_chat_init( | |
dir_path="../logs", | |
filename="reduced_sample.log" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can you post an example of output ?