Skip to content

Instantly share code, notes, and snippets.

@ranfysvalle02
Last active August 31, 2024 15:22
Show Gist options
  • Save ranfysvalle02/8f43473292b141e32344cdf781503ed8 to your computer and use it in GitHub Desktop.
Save ranfysvalle02/8f43473292b141e32344cdf781503ed8 to your computer and use it in GitHub Desktop.
from unstructured.partition.auto import partition
import pymongo
from openai import AzureOpenAI
az_client = AzureOpenAI(azure_endpoint="",api_version="",api_key="")
def generate_embeddings(text, model=""): # model = "deployment_name"
return az_client.embeddings.create(input = [text], model=model).data[0].embedding
MDB_URI = ""
DB_NAME = ""
COLLECTION_NAME = ""
print("Partioning PDF document...")
elements = partition("document.pdf")
print(elements)
print("\n\n".join([str(el) for el in elements]))
mdb_client = pymongo.MongoClient(MDB_URI)
mdb_db = mdb_client[DB_NAME]
mdb_collection = mdb_db[COLLECTION_NAME]
for element in elements:
mdb_collection.insert_one({
"text":str(element.text),
"embeddings":generate_embeddings(str(element.text)),
"metadata": {
"raw_element":element.to_dict(),
},
"source":"document.pdf"
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment