Skip to content

Instantly share code, notes, and snippets.

@idontcalculate
Created December 13, 2023 04:43
Show Gist options
  • Save idontcalculate/0a543db4242a7900cf8b6a2bf3571f75 to your computer and use it in GitHub Desktop.
Save idontcalculate/0a543db4242a7900cf8b6a2bf3571f75 to your computer and use it in GitHub Desktop.
from llama_index.agent import OpenAIAgent
from llama_index import load_index_from_storage, StorageContext
from llama_index.node_parser import SentenceSplitter
# Initialize the SentenceSplitter node parser
node_parser = SentenceSplitter()
#load documents and build vector index
for idx, patent_title in enumerate(patent_titles):
file_path = os.path.join(patents_dir, f"{patent_title}.txt")
if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
with open(file_path, 'r') as file:
patent_text = file.read()
# Wrap the patent_text in a Document object and pass it inside a list to the method
documents = [Document(id_=str(idx), text=patent_text)] # Assign a unique ID to each document
nodes = node_parser.get_nodes_from_documents(documents)
all_nodes.extend(nodes)
# Check if the vector index directory exists
vector_index_path = f"./data/storage/patents/{patent_title}"
if not os.path.exists(vector_index_path):
# If the directory does not exist, build vector index
vector_index = VectorStoreIndex(nodes, service_context=service_context)
vector_index.storage_context.persist(persist_dir=vector_index_path)
else:
# If it exists, load the vector index from storage
vector_index = load_index_from_storage(
StorageContext.from_defaults(persist_dir=vector_index_path),
service_context=service_context,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment