Skip to content

Instantly share code, notes, and snippets.

@mtanco
Created June 10, 2024 17:13
Download all collection data upload all collection data
import os
from h2ogpte import H2OGPTE
EMAIL = "asdfasdf@h2o.ai"
client = H2OGPTE(
address='https://playground.h2ogpte.h2o.ai',
api_key='sk-',
)
def download_all_collections():
for collection in client.list_recent_collections(0, -1):
if collection.username == EMAIL:
print(collection.name, collection.document_count)
if not os.path.exists(collection.name):
os.mkdir(collection.name)
for document in client.list_documents_in_collection(collection_id=collection.id, offset=0, limit=10000):
print(document)
client.download_document(collection.name, document.name, document.id)
def upload_all_folders_as_collections():
# Loop over all directories in the specified root directory
for folder_name in os.listdir():
# Check if it's a directory
if os.path.isdir(folder_name) and not folder_name.startswith('.'):
# Create a new collection with the name of the folder
collection_id = client.create_collection(
name=folder_name,
description="",
)
# Loop over all files in the folder
for filename in os.listdir(folder_name):
file_path = os.path.join(folder_name, filename)
# Check if it's a file
if os.path.isfile(file_path):
# Upload the file
with open(file_path, 'rb') as f:
document_id = client.upload(filename, f)
# Ingest the uploaded file to the collection
client.ingest_uploads(collection_id, [document_id])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment