Skip to content

Instantly share code, notes, and snippets.

@tazarov
Created November 20, 2023 15:12
Show Gist options
  • Save tazarov/0193b5aebf1f87de4dadcecc5b04b77f to your computer and use it in GitHub Desktop.
Save tazarov/0193b5aebf1f87de4dadcecc5b04b77f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2023-11-20T15:03:48.616101Z",
"start_time": "2023-11-20T15:03:33.351050Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1000\n",
"{'ids': ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'], 'embeddings': None, 'metadatas': [None, None, None, None, None, None, None, None, None, None], 'documents': ['document 0', 'document 1', 'document 2', 'document 3', 'document 4', 'document 5', 'document 6', 'document 7', 'document 8', 'document 9'], 'uris': None, 'data': None}\n"
]
}
],
"source": [
"from chromadb import Settings\n",
"import chromadb\n",
"\n",
"client = chromadb.PersistentClient(path=\"test\", settings=Settings(allow_reset=True))\n",
"client.reset() #reset the database so we can run this script multiple times\n",
"col = client.get_or_create_collection(\"test\")\n",
"\n",
"col.add(ids=[f\"{i}\" for i in range(1000)],documents=[f\"document {i}\" for i in range(1000)])\n",
"# client.delete_collection(\"test1\")\n",
"newCol = client.get_or_create_collection(\"test1\")\n",
"\n",
"\n",
"for i in range(0,1000,10):\n",
" batch = col.get(include = [\"metadatas\",\"documents\",\"embeddings\"], limit=10, offset=i)\n",
" newCol.add(ids=batch[\"ids\"],documents=batch[\"documents\"],metadatas=batch[\"metadatas\"],embeddings=batch[\"embeddings\"])\n",
"\n",
"print(newCol.count())\n",
"print(newCol.get(offset=0, limit=10)) #get first 10 documents"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false
},
"id": "74a7cd788009f4e0"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment