ranfysvalle02 · May 19, 2023 03:44
diff --git a/llama-test.py b/llama-test.py
 import logging
 import sys
 import nest_asyncio
 nest_asyncio.apply()
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

 from llama_index import (
    LLMPredictor,
    GPTVectorStoreIndex, 
    GPTListIndex, 
    GPTSimpleKeywordTableIndex,
    download_loader,
    load_index_from_storage
 )

 import pymongo
 from langchain.chat_models import ChatOpenAI
 from llama_index.response.notebook_utils import display_response
 import requests
 from pathlib import Path
 import os
 from llama_index.node_parser import SimpleNodeParser
 from llama_index.storage.docstore import MongoDocumentStore
 from llama_index.storage.index_store import MongoIndexStore
 from llama_index.storage.storage_context import StorageContext


 MONGO_URI = "mongodb+srv://<username>:<password>@<atlas-cluster>.mongodb.net/?retryWrites=true&w=majority&tls=true"
 MONGODB_DATABASE="llamatest"

 def download_pdf(url, out_dir):
    """Downloads a PDF file from the given URL and saves it to the given directory.

    Args:
        url: The URL of the PDF file to download.
        out_dir: The directory to save the PDF file to.

    Returns:
        The path to the downloaded PDF file.
    """
    if not out_dir.exists():
        os.makedirs(out_dir)
    out_path = out_dir / "paper.pdf"

    if not out_path.exists():
        r = requests.get(url)
        with open(out_path, 'wb') as f:
            f.write(r.content)

    return out_path


 def load_data(out_path):
    """Loads the data from the given PDF file.

    Args:
        out_path: The path to the PDF file to load data from.

    Returns:
        The data from the PDF file.
    """
    PDFReader = download_loader("PDFReader")
    loader = PDFReader()
    doc = loader.load_data(file=Path(out_path))[0]
    return doc


 def create_nodes(doc):
    """Creates nodes from the given document.

    Args:
        doc: The document to create nodes from.

    Returns:
        A list of nodes.
    """
    nodes = SimpleNodeParser().get_nodes_from_documents([doc])
    return nodes


 def create_index(nodes, storage_context):
    """Creates an index from the given nodes and storage context.

    Args:
        nodes: The nodes to create an index from.
        storage_context: The storage context to create an index in.

    Returns:
        The index object.
    """
    list_index = GPTListIndex(nodes, storage_context=storage_context)
    keyword_table_index = GPTSimpleKeywordTableIndex(nodes, storage_context=storage_context) 
    vector_index = GPTVectorStoreIndex(nodes, storage_context=storage_context) 
    vector_index.set_index_id("vectorz")
    return vector_index


 def query_index(vector_index, query):
    """Queries the given index for the given query.

    Args:
        vector_index: The index to query.
        query: The query to search for.

    Returns:
        A list of results.
    """
    query_engine = vector_index.as_query_engine()
    list_response = query_engine.query(query)
    return list_response


 def main():
    """The main function.

    This function downloads a PDF file, creates nodes from the file, creates an index from the nodes, and queries the index for a summary of the document.
    """
    out_dir = Path("data")
    out_path = download_pdf('https://webassets.mongodb.com/MongoDB_Supply_Chain_Security_whitepaper_Jun.pdf', out_dir)
    doc = load_data(out_path)
    nodes = create_nodes(doc)
    storage_context = StorageContext.from_defaults(
        docstore=MongoDocumentStore.from_uri(uri=MONGO_URI, db_name=MONGODB_DATABASE),
        index_store=MongoIndexStore.from_uri(uri=MONGO_URI, db_name=MONGODB_DATABASE),
    )
    vector_index = create_index(nodes, storage_context)
    list_response = query_index(vector_index, "What is a summary of this document?")
    print(list_response)


 main()
	import logging
	import sys
	import nest_asyncio
	nest_asyncio.apply()
	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	from llama_index import (
	LLMPredictor,
	GPTVectorStoreIndex,
	GPTListIndex,
	GPTSimpleKeywordTableIndex,
	download_loader,
	load_index_from_storage
	)

	import pymongo
	from langchain.chat_models import ChatOpenAI
	from llama_index.response.notebook_utils import display_response
	import requests
	from pathlib import Path
	import os
	from llama_index.node_parser import SimpleNodeParser
	from llama_index.storage.docstore import MongoDocumentStore
	from llama_index.storage.index_store import MongoIndexStore
	from llama_index.storage.storage_context import StorageContext


	MONGO_URI = "mongodb+srv://<username>:<password>@<atlas-cluster>.mongodb.net/?retryWrites=true&w=majority&tls=true"
	MONGODB_DATABASE="llamatest"

	def download_pdf(url, out_dir):
	"""Downloads a PDF file from the given URL and saves it to the given directory.

	Args:
	url: The URL of the PDF file to download.
	out_dir: The directory to save the PDF file to.

	Returns:
	The path to the downloaded PDF file.
	"""
	if not out_dir.exists():
	os.makedirs(out_dir)
	out_path = out_dir / "paper.pdf"

	if not out_path.exists():
	r = requests.get(url)
	with open(out_path, 'wb') as f:
	f.write(r.content)

	return out_path


	def load_data(out_path):
	"""Loads the data from the given PDF file.

	Args:
	out_path: The path to the PDF file to load data from.

	Returns:
	The data from the PDF file.
	"""
	PDFReader = download_loader("PDFReader")
	loader = PDFReader()
	doc = loader.load_data(file=Path(out_path))[0]
	return doc


	def create_nodes(doc):
	"""Creates nodes from the given document.

	Args:
	doc: The document to create nodes from.

	Returns:
	A list of nodes.
	"""
	nodes = SimpleNodeParser().get_nodes_from_documents([doc])
	return nodes


	def create_index(nodes, storage_context):
	"""Creates an index from the given nodes and storage context.

	Args:
	nodes: The nodes to create an index from.
	storage_context: The storage context to create an index in.

	Returns:
	The index object.
	"""
	list_index = GPTListIndex(nodes, storage_context=storage_context)
	keyword_table_index = GPTSimpleKeywordTableIndex(nodes, storage_context=storage_context)
	vector_index = GPTVectorStoreIndex(nodes, storage_context=storage_context)
	vector_index.set_index_id("vectorz")
	return vector_index


	def query_index(vector_index, query):
	"""Queries the given index for the given query.

	Args:
	vector_index: The index to query.
	query: The query to search for.

	Returns:
	A list of results.
	"""
	query_engine = vector_index.as_query_engine()
	list_response = query_engine.query(query)
	return list_response


	def main():
	"""The main function.

	This function downloads a PDF file, creates nodes from the file, creates an index from the nodes, and queries the index for a summary of the document.
	"""
	out_dir = Path("data")
	out_path = download_pdf('https://webassets.mongodb.com/MongoDB_Supply_Chain_Security_whitepaper_Jun.pdf', out_dir)
	doc = load_data(out_path)
	nodes = create_nodes(doc)
	storage_context = StorageContext.from_defaults(
	docstore=MongoDocumentStore.from_uri(uri=MONGO_URI, db_name=MONGODB_DATABASE),
	index_store=MongoIndexStore.from_uri(uri=MONGO_URI, db_name=MONGODB_DATABASE),
	)
	vector_index = create_index(nodes, storage_context)
	list_response = query_index(vector_index, "What is a summary of this document?")
	print(list_response)


	main()