Last active
March 3, 2024 08:47
-
-
Save patchy631/7eb7d99d8353c61e11d7d102bb529549 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
os.environ["HF_HOME"] = "/teamspace/studios/this_studio/weights" | |
os.environ["TORCH_HOME"] = "/teamspace/studios/this_studio/weights" | |
import gc | |
import re | |
import uuid | |
import textwrap | |
import nest_asyncio | |
from dotenv import load_dotenv | |
from llama_index.core import Settings | |
from llama_index.llms.ollama import Ollama | |
from llama_index.core import PromptTemplate | |
from llama_index.readers.github import GithubRepositoryReader, GithubClient | |
from llama_index.core import VectorStoreIndex | |
from llama_index.core.storage.storage_context import StorageContext | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from llama_index.embeddings.langchain import LangchainEmbedding | |
from rag_101.retriever import ( | |
load_embedding_model, | |
load_reranker_model | |
) | |
# setting up the llm | |
llm=Ollama(model="mistral", request_timeout=60.0) | |
# setting up the embedding model | |
lc_embedding_model = load_embedding_model() | |
embed_model = LangchainEmbedding(lc_embedding_model) | |
# allows nested access to the event loop | |
nest_asyncio.apply() | |
# utility functions | |
def parse_github_url(url): | |
pattern = r"https://github\.com/([^/]+)/([^/]+)" | |
match = re.match(pattern, url) | |
return match.groups() if match else (None, None) | |
def validate_owner_repo(owner, repo): | |
return bool(owner) and bool(repo) | |
def initialize_github_client(github_token): | |
return GithubClient(github_token) | |
# Setup a query engine | |
def setup_query_engine(github_token, github_url): | |
owner, repo = parse_github_url(github_url) | |
if validate_owner_repo(owner, repo): | |
# Initialize GitHub client with authentication token | |
github_client = initialize_github_client(github_token) | |
loader = GithubRepositoryReader( | |
github_client, | |
owner=owner, | |
repo=repo, | |
filter_file_extensions=( | |
[".py", ".ipynb", ".js", ".ts", ".md"], | |
GithubRepositoryReader.FilterType.INCLUDE, | |
), | |
verbose=False, | |
concurrent_requests=5, | |
) | |
try: | |
docs = loader.load_data(branch="main") | |
# ====== Create vector store and upload indexed data ====== | |
Settings.embed_model = embed_model | |
index = VectorStoreIndex.from_documents(docs) | |
# TODO try async index creation for faster emebdding generation & persist it to memory! | |
# index = VectorStoreIndex(docs, use_async=True) | |
# ====== Setup a query engine ====== | |
Settings.llm = llm | |
query_engine = index.as_query_engine(similarity_top_k=4) | |
# ====== Customise prompt template ====== | |
qa_prompt_tmpl_str = ( | |
"Context information is below.\n" | |
"---------------------\n" | |
"{context_str}\n" | |
"---------------------\n" | |
"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\n" | |
"Query: {query_str}\n" | |
"Answer: " | |
) | |
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str) | |
query_engine.update_prompts( | |
{"response_synthesizer:text_qa_template": qa_prompt_tmpl} | |
) | |
if docs: | |
print("Data loaded successfully!!") | |
print("Ready to chat!!") | |
else: | |
print("No data found, check if the repository is not empty!") | |
return query_engine | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
else: | |
print('Invalid github repo, try again!') | |
return None | |
# Provide a github token & url to the repository you want to chat with | |
github_token = "your_github_token" | |
github_url = "add link repo you want to chat with" | |
query_engine = setup_query_engine(github_token=github_token, github_url=github_url) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment