idontcalculate · December 13, 2023 04:43
diff --git a/node_parser.py b/node_parser.py
 from llama_index.agent import OpenAIAgent
 from llama_index import load_index_from_storage, StorageContext
 from llama_index.node_parser import SentenceSplitter

 # Initialize the SentenceSplitter node parser
 node_parser = SentenceSplitter()

 #load documents and build vector index
 for idx, patent_title in enumerate(patent_titles):
    file_path = os.path.join(patents_dir, f"{patent_title}.txt")

    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        with open(file_path, 'r') as file:
            patent_text = file.read()

        # Wrap the patent_text in a Document object and pass it inside a list to the method
        documents = [Document(id_=str(idx), text=patent_text)]  # Assign a unique ID to each document
        nodes = node_parser.get_nodes_from_documents(documents)
        all_nodes.extend(nodes)

        # Check if the vector index directory exists
        vector_index_path = f"./data/storage/patents/{patent_title}"
        if not os.path.exists(vector_index_path):
            # If the directory does not exist, build vector index
            vector_index = VectorStoreIndex(nodes, service_context=service_context)
            vector_index.storage_context.persist(persist_dir=vector_index_path)
        else:
            # If it exists, load the vector index from storage
            vector_index = load_index_from_storage(
                StorageContext.from_defaults(persist_dir=vector_index_path),
                service_context=service_context,
            )
	from llama_index.agent import OpenAIAgent
	from llama_index import load_index_from_storage, StorageContext
	from llama_index.node_parser import SentenceSplitter

	# Initialize the SentenceSplitter node parser
	node_parser = SentenceSplitter()

	#load documents and build vector index
	for idx, patent_title in enumerate(patent_titles):
	file_path = os.path.join(patents_dir, f"{patent_title}.txt")

	if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
	with open(file_path, 'r') as file:
	patent_text = file.read()

	# Wrap the patent_text in a Document object and pass it inside a list to the method
	documents = [Document(id_=str(idx), text=patent_text)] # Assign a unique ID to each document
	nodes = node_parser.get_nodes_from_documents(documents)
	all_nodes.extend(nodes)

	# Check if the vector index directory exists
	vector_index_path = f"./data/storage/patents/{patent_title}"
	if not os.path.exists(vector_index_path):
	# If the directory does not exist, build vector index
	vector_index = VectorStoreIndex(nodes, service_context=service_context)
	vector_index.storage_context.persist(persist_dir=vector_index_path)
	else:
	# If it exists, load the vector index from storage
	vector_index = load_index_from_storage(
	StorageContext.from_defaults(persist_dir=vector_index_path),
	service_context=service_context,
	)