gd03champ · June 29, 2024 08:47 · daisseur · Jun 29, 2024
diff --git a/gistfile1.txt b/gistfile1.txt
 from langchain_community.llms import Ollama
 from langchain_community.document_loaders import WebBaseLoader
 from langchain_community.document_loaders import DirectoryLoader
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain.chains import create_retrieval_chain
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain.callbacks.manager import CallbackManager

 from rich.console import Console
 from rich.progress import Progress
 from rich.panel import Panel
 from rich.text import Text
 from rich.markdown import Markdown
 from rich.prompt import Prompt
 import time

 def rag_chat_init(dir_path, filename):
    console = Console()

    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

    # Invoke chain with RAG context
    llm = Ollama(model="phi3", callbacks=callback_manager)

    # Invoke chain with RAG context
    loader = DirectoryLoader(
        dir_path, 
        glob=filename,
        use_multithreading=True
    )
    with console.status("[cyan]Loading log files..."):
        docs = loader.load()
    console.print(f"Total log files: {len(docs)}")

    with console.status("[cyan]Creating vector store..."):
        # Vector store things
        embeddings = OllamaEmbeddings(model="nomic-embed-text")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        split_documents = text_splitter.split_documents(docs)
        vector_store = FAISS.from_documents(split_documents, embeddings)

    # Construct prompt
    prompt = ChatPromptTemplate.from_template(
        """
        You are in an intelligent rag system named logrctx for log analysis and given some extracted parts from logs as context through rag system along with a question to answer.
        If you don't know the answer, just say "I don't know." Don't try to make up an answer.

        Don't provide any information that is not directly relevant to the question. Like debugging information, reasoning, recommendation or any extra context unless asked. 
        Just provide what's asked from the given context by summarizing the context.

        Prefer to use markdown format wherever possible for visually appealing output and use time from the logs in your response if needed for consise response.        
        Keep the response as short and to the point as possible while not leaving any important information out.

        Use only the following pieces of context to answer the question at the end.

        Context: {context}

        Question: {input}
        """
    )

    # Custom retrieval and generation process
    def custom_retrieval_chain(query):
        with console.status("[cyan]Retrieving relevant logs..."):
            docs = vector_store.similarity_search(query, k=5)
            console.print("[green]Context mapped successfully.")
            console.print("Retrieved docs 👇 ")
            for doc in docs:
                console.print(Panel.fit("[cyan]" + str(doc.metadata['source']) + "[/cyan]\n" + str(doc.page_content)))

        console.print("[cyan]Generating response...")
        response = docs_chain.invoke({"context": docs, "input": query})
        
        return response

    # Retrieve context from vector store
    docs_chain = create_stuff_documents_chain(llm, prompt)
    #retriever = vector_store.as_retriever()

    # Winner winner chicken dinner
    console.print("\n[bold cyan]Invoking chain...")
    while True:
        query = Prompt.ask("[bold green]Prompt[/bold green]")

        if query.lower() in ["exit", "quit"]:
            break

        response = custom_retrieval_chain(query)

        print("\n")
        console.print(Panel.fit("[bold green] logrctx ai 🧠 [/bold green]"))
        console.print(Panel.fit(Markdown(f"{response}")))

 if __name__ == "__main__":
    rag_chat_init(
        dir_path="../logs",
        filename="reduced_sample.log"
    )
	from langchain_community.llms import Ollama
	from langchain_community.document_loaders import WebBaseLoader
	from langchain_community.document_loaders import DirectoryLoader
	from langchain_community.embeddings import OllamaEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain.chains import create_retrieval_chain
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain.callbacks.manager import CallbackManager

	from rich.console import Console
	from rich.progress import Progress
	from rich.panel import Panel
	from rich.text import Text
	from rich.markdown import Markdown
	from rich.prompt import Prompt
	import time

	def rag_chat_init(dir_path, filename):
	console = Console()

	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	# Invoke chain with RAG context
	llm = Ollama(model="phi3", callbacks=callback_manager)

	# Invoke chain with RAG context
	loader = DirectoryLoader(
	dir_path,
	glob=filename,
	use_multithreading=True
	)
	with console.status("[cyan]Loading log files..."):
	docs = loader.load()
	console.print(f"Total log files: {len(docs)}")

	with console.status("[cyan]Creating vector store..."):
	# Vector store things
	embeddings = OllamaEmbeddings(model="nomic-embed-text")
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	split_documents = text_splitter.split_documents(docs)
	vector_store = FAISS.from_documents(split_documents, embeddings)

	# Construct prompt
	prompt = ChatPromptTemplate.from_template(
	"""
	You are in an intelligent rag system named logrctx for log analysis and given some extracted parts from logs as context through rag system along with a question to answer.
	If you don't know the answer, just say "I don't know." Don't try to make up an answer.

	Don't provide any information that is not directly relevant to the question. Like debugging information, reasoning, recommendation or any extra context unless asked.
	Just provide what's asked from the given context by summarizing the context.

	Prefer to use markdown format wherever possible for visually appealing output and use time from the logs in your response if needed for consise response.
	Keep the response as short and to the point as possible while not leaving any important information out.

	Use only the following pieces of context to answer the question at the end.

	Context: {context}

	Question: {input}
	"""
	)

	# Custom retrieval and generation process
	def custom_retrieval_chain(query):
	with console.status("[cyan]Retrieving relevant logs..."):
	docs = vector_store.similarity_search(query, k=5)
	console.print("[green]Context mapped successfully.")
	console.print("Retrieved docs 👇 ")
	for doc in docs:
	console.print(Panel.fit("[cyan]" + str(doc.metadata['source']) + "[/cyan]\n" + str(doc.page_content)))

	console.print("[cyan]Generating response...")
	response = docs_chain.invoke({"context": docs, "input": query})

	return response

	# Retrieve context from vector store
	docs_chain = create_stuff_documents_chain(llm, prompt)
	#retriever = vector_store.as_retriever()

	# Winner winner chicken dinner
	console.print("\n[bold cyan]Invoking chain...")
	while True:
	query = Prompt.ask("[bold green]Prompt[/bold green]")

	if query.lower() in ["exit", "quit"]:
	break

	response = custom_retrieval_chain(query)

	print("\n")
	console.print(Panel.fit("[bold green] logrctx ai 🧠 [/bold green]"))
	console.print(Panel.fit(Markdown(f"{response}")))

	if __name__ == "__main__":
	rag_chat_init(
	dir_path="../logs",
	filename="reduced_sample.log"
	)