Skip to content

Instantly share code, notes, and snippets.

@mherkazandjian
Created August 20, 2025 12:16
Show Gist options
  • Save mherkazandjian/4992bb970c3e2e0e1a9e60d2cb6f794c to your computer and use it in GitHub Desktop.
Save mherkazandjian/4992bb970c3e2e0e1a9e60d2cb6f794c to your computer and use it in GitHub Desktop.
couchdb quick tutorial and examples
"""
<keywords>
test, python, couchdb, database, create, connect, server
</keywords>
<description>
This script connects to a CouchDB server and creates a database named "tutorial"
if it does not already exist.
</description>
"""
# %%
import couchdb
client = couchdb.Server("http://admin:[email protected]:5984/")
print("Connected to CouchDB server:", client)
# %%
# get/list all the databases
dbs = list(client)
print("Databases:", dbs)
# %%
db_name = "tutorial"
if db_name not in list(client):
print(f"Database does not exist in couchdb, creating database '{db_name}'...")
db = client.create(db_name)
else:
print(f"Database '{db_name}' already exists, connecting to it...")
# %%
# try to get a database that does not exist
db = client['non_existent_db']
# %%
# databases can be retrieved by name using []
db = client[db_name]
print(f"Database '{db_name}' created successfully: {db}")
# %%
# delete a database
#del client[db_name]
# %%
print('done')
# %%
"""
<keywords>
test, python, couchdb, database, put, insert, document, read, update, delete, curd, bulk
</keywords>
<description>
Perform curd operations on a CouchDB database.
It is assumed that the CouchDB server is running and accessible and that a database called
"tutorial" exists.
The following operations are performed:
- Connect to a CouchDB server and database.
- List all documents in the database.
- Create documents with and without specifying an ID.
- Read documents by their ID.
- Update documents by modifying and saving them.
- Delete documents from the database.
- Perform bulk operations to insert multiple documents.
- Add, retrieve, and list attachments for documents.
</description>
"""
# %%
import couchdb
client = couchdb.Server("http://admin:[email protected]:5984/")
print("Connected to CouchDB server:", client)
# %%
# get the database
db_name = "tutorial"
db = client[db_name]
print(f"Connected to database '{db_name}': {db}")
# %%
# list all the documents in the database
docs = db.view('_all_docs', include_docs=True)
for doc in docs:
print(f"Document ID: {doc.id}, Revision: {doc.value['rev']}, Content: {doc.doc}")
# %% markdown
## create documents
# %%
# create (insert) documents
# 1) by auto-generated _id if the id is not provided
alice = {"type": "person", "name": "Alice", "age": 30, "tags": ["admin", "team"]}
alice_id, alice_rev = db.save(alice)
print(alice_id, alice_rev)
# %%
# create (insert) documents
# 2) by providing your own _id
bob = {"_id": "person:bob", "type": "person", "name": "Bob", "age": 28, "tags": ["team"]}
bob_id, bob_rev = db.save(bob)
print(bob_id, bob_rev)
# %% markdown
## read documents
# %%
# read (get) alice's documents, the returned document is an couchdb.client.Document object
doc = db[alice_id]
print(f'id={doc["_id"]}, rev={doc["_rev"]}, name={doc["name"]}, age={doc["age"]} tags={doc["tags"]}')
# %%
# read (get) bob's document
doc = db[bob_id]
print(f'id={doc["_id"]}, rev={doc["_rev"]}, name={doc["name"]}, age={doc["age"]}, tags={doc["tags"]}')
# %% markdown
## update documents
# documents can be updated by modifying the fetched document and saving it again
doc['age'] = 31 # update age
doc['tags'].append('developer') # add a new tag
# save the updated document
db.save(doc) # save the updated document
## get bob's document again and verify that the age is updated
doc = db[bob_id]
print(f'id={doc["_id"]}, rev={doc["_rev"]}, name={doc["name"]}, age={doc["age"]}, tags={doc["tags"]}')
# %% markdown
## delete documents
db.delete(doc) # delete the document for Alice
print(f'Deleted document for Alice with id={alice_id}')
# %% markdown
## bulk operations
students = [
{"type": "student", "name": "Bob", "age": 17, "classes": ["math", "physics"]},
{"type": "student", "name": "Carol", "age": 16, "classes": ["history"]},
{"type": "student", "name": "Dan", "age": 18, "classes": ["math", "art"]},
]
results = db.update(students) # bulk insert students
print("Bulk inserted students:")
for status, doc_id, rev_or_error in results:
if status:
print(f"\tDocument ID: {doc_id}, Revision: {rev_or_error}")
else:
print(f"\tError inserting document: {doc_id}, Error: {rev_or_error}")
# %%
## attachments
# - first create the document
# - the add the attachment to it
#
# Attachments can be added to documents, for example, to store images or files.
# Let's create a document and add an attachment to it.
doc = {"type": "report", "title": "Monthly Report", "content": "This is the content of the report."}
doc_id, _ = db.save(doc)
# %%
attachment_data = b"This is the content of the attachment."
attachment_name = "attachment.txt"
doc = db[doc_id] # Fetch the document using the document ID
db.put_attachment(doc, attachment_data, attachment_name, "text/plain")
# %%
# list the content of the attachment
attachment_data = db.get_attachment(doc, attachment_name).read()
print(f"Attachment '{attachment_name}' content: {attachment_data}")
# %%
# list all the attachments in the document
all_attachments = doc['_attachments']
print("All attachments in the document:")
for attachment_name, attachment_meta in all_attachments.items():
print(f" attachement name: {attachment_name}")
print(f" attachment metadata: {attachment_meta}")
# get the attachment content
attachment_data = db.get_attachment(doc, attachment_name).read()
print(f" attachment content: {attachment_data}")
print(' ' + '-' * 40)
# %%
print('done')
# %%
"""
<keywords>
test, python, couchdb, database, views, design, documents, map, reduce, query, mango
</keywords>
<description>
Perform curd operations on a CouchDB database.
It is assumed that the CouchDB server is running and accessible and that a database called
"tutorial" exists.
</description>
"""
# %%
import couchdb
client = couchdb.Server("http://admin:[email protected]:5984/")
print("Connected to CouchDB server:", client)
# %%
# get the database
db_name = "tutorial"
db = client[db_name]
print(f"Connected to database '{db_name}': {db}")
# %%
# populate the database with some docs to do operations on later on in this script
# create entries that are the x, sin(x), cos(x) for 1000 points
import numpy as np
points = [
{"type": "coordinate", 'x': x, 'sin(x)': np.sin(x), "cos(x)": np.cos(x)}
for x in np.linspace(0, 2*np.pi, 1000)
]
# %%
from matplotlib import pyplot as plt
plt.plot([p['x'] for p in points], [p['sin(x)'] for p in points], label='sin(x)')
# %%
# insert the points into the database
results = db.update(points) # bulk insert students
# %%
# check the results of the bulk insert, only show the errors
for status, doc_id, rev_or_error in results:
if not status:
print(f"\tError inserting document: {doc_id}, Error: {rev_or_error}")
raise Exception(f"Error inserting document: {doc_id}, Error: {rev_or_error}")
else:
print("All documents inserted successfully.")
# %% [markdown]
## views, design documents, and querying, aggregation
#
# Views are indexes you define with JavaScript functions stored in design documents.
# Design documents are special CouchDB documents that start with _design/ and contain
# the defined views.
# by convention, design documents are stored in the database with an ID starting with
# `_design/`. For example `_design/<my_view_name>`.
#
# the logic of the map / reduce patter is the following:
# - Map: if a documents meets a certain condition, it emits a key-value pair.
# - Reduce: it aggregates the emitted values by key (e.g., count, sum, etc.).
# %%
# define the view (design document) and associate it with the database
# the functions is
# ````javascript
# function (doc) {
# if (doc.type === "coordinate") {
# emit(doc['x], doc['sin(x)', doc['cos(x)']);
# }
# }
# ````
# it will be put into a doc and saved as a design document
# this view does the following:
# - map the function to every point
# - if the point is of type "coordinate", it emits the x value as key
# - define a reduce operation that counts the number of points emitted
ddoc = {
"_id": "_design/points",
"views": {
"all": {
"map": 'function(doc){ if(doc.type === "coordinate"){ emit(doc["x"], [doc["sin(x)"], doc["cos(x)"]]); } }',
"reduce": "_count"
}
}
}
# %%
# check if the design document already exists, if it does, update it
# get the design document if it exists without using a try-except block
if "_design/points" in db:
existing = db["_design/points"]
print("Design document already exists, check if it needs to be updated...")
# pretty print the existing design document
import json
print(json.dumps(existing, indent=2))
# compare the existing views with the new ones
if existing.get("views") != ddoc["views"]:
print("Updating views in the design document...")
existing["views"] = ddoc["views"]
db.save(existing)
else:
print("No changes in views, nothing to update.")
else:
existing = None
print("Design document does not exist, creating it...")
db.save(ddoc) # save the new design document
# %%
# another way of defining the javascript function is by defining a literal multiline string
view_code = '''
function(doc) {
if(doc.type == "coordinate") {
emit(doc.x, [doc['sin(x)'], doc['cos(x)']]);
}
}
'''
# when defining as an f-string, you can use f-string formatting
view_code = f'''
function(doc) {{
if(doc.type == "coordinate") {{
emit(doc.x, [doc['sin(x)'], doc['cos(x)']]);
}}
}}
'''
# %%
# query the view, below reduce=False is set just to get the map results without
# aggregation, otherwise it would return the count of all points
for point in list(db.view('points/all', reduce=False))[0:10]: # limit to first 10 points
print(point)
# double check the returned values by plotting them
_points = list(db.view('points/all', reduce=False))[::50] # get every 10th point for better performance
x_values = [point.key for point in _points]
sin_values = [point.value[0] for point in _points]
cos_values = [point.value[1] for point in _points]
# %%
# perform the call without reduce to demonstrate that it returns the count
points_count = db.view('points/all', reduce=True).rows[0].value
print(f"Total points: {points_count}")
# %%
plt.plot([p['x'] for p in points], [p['sin(x)'] for p in points], label='sin(x)')
plt.plot(x_values, sin_values, 'r+', label='sin(x)')
plt.legend()
# %%
# define a new set of docs to demonstrate other features of views
# use the faker library to generate random names of students and add a country field
from faker import Faker
fake = Faker()
students = [
{"type": "student",
"name": fake.name(),
"age": fake.random_int(min=16, max=25),
"classes": fake.words(nb=3, unique=True),
"country": fake.country()}
for _ in range(1000)
]
# print the head 10 of the students list in a nice way
for student in students[:10]:
print(f"Name: {student['name']}, Age: {student['age']}, Classes: {', '.join(student['classes'])}, Country: {student['country']}")
# save the students to the database
results = db.update(students) # bulk insert students
# %%
# define a view that gets the students by country and by name and country
# the by_country emits only the count
# the by_name_country emits the name and country as a key
ddoc = {
"_id": "_design/students",
"views": {
"by_country": {
"map": 'function(doc){ if(doc.type === "student" && doc.country){ emit(doc.country, 1); } }',
"reduce": "_count"
},
"by_name_country": {
"map": 'function(doc){ if(doc.type === "student" && doc.name && doc.country){ emit(doc.country, doc.name); } }',
}
}
}
# %%
# save the view to the database if it does not exist if it exists, update it
if "_design/students" not in db:
print("Design document does not exist, creating it...")
db.save(ddoc) # save the new design document
else:
existing = db["_design/students"]
print("Design document already exists, check if it needs to be updated...")
# pretty print the existing design document
import json
print(json.dumps(existing, indent=2))
# compare the existing views with the new ones
if existing.get("views") != ddoc["views"]:
print("Updating views in the design document...")
existing["views"] = ddoc["views"]
db.save(existing)
else:
print("No changes in views, nothing to update.")
# %%
# use the new view to get the number of students by country
print("Counts per country:")
for row in db.view('students/by_country', group=True):
print(row.key, row.value) # e.g., "USA 100", "Canada 50", ...
# %%
# demonstrate using the key=<value> when using views
# print the students from a specific country, e.g., Argentina
country = "Argentina"
print(f"Students from {country}:")
for row in db.view('students/by_name_country', reduce=False, key=country):
print(row.key, row.value)
# print the studetns from countries whose names start with "A" to "C"
print("Students from countries A to C:")
for row in db.view('students/by_name_country', reduce=False, startkey="A", endkey="C"):
print(row.key, row.value)
# %%
# limit the number of matches
print("First 5 students from Argentina:")
for row in db.view('students/by_name_country', reduce=False, key=country, limit=5):
print(row.key, row.value)
# %%
# demonstrate pagination (3 items per page) and iterate through all pages
page_size = 3
page = 0
print(f"Students from {country} (paginated, {page_size} per page):")
while True:
rows = list(db.view(
'students/by_name_country',
reduce=False,
key=country,
limit=page_size,
skip=page * page_size
))
if not rows:
if page == 0:
print(" (no students found)")
break
print(f"Page {page + 1}:")
for row in rows:
print(" ", row.key, row.value)
if len(rows) < page_size:
break
page += 1
# %%
# demonstrate using include_docs=True to get the full documents vs not using it
print("First 5 students from Argentina with full documents:")
for row in db.view('students/by_name_country', reduce=False, key=country, limit=5, include_docs=True):
print(f"Country: {row.doc['country']}, Name: {row.doc['name']}, Age: {row.doc['age']}, Classes: {', '.join(row.doc['classes'])}")
print("First 5 students from Argentina without full documents:")
for row in db.view('students/by_name_country', reduce=False, key=country, limit=5):
print(f"Country: {row.key}, Name: {row.value}")
# %%
# Mango is a sql-like query language for CouchDB that can be used to query
# without creating **views**.
# it is not as fast as views, but it is more flexible for ad-hoc queries
# %%
# run the query to find students with age >= 17
query = {
"selector": {
"type": "student",
"age": {"$gte": 17}
},
"fields": ["name", "age"],
"sort": [{"age": "asc"}]
}
# Use POST /{db}/_find to run the query
status, headers, find_result = db.resource.post_json('_find', body=query)
if status != 200:
raise RuntimeError(f"Find query failed (status {status}): {find_result}")
# Defensive handling in case the response does not contain 'docs'
docs = find_result.get('docs', [])
if not isinstance(docs, list):
raise RuntimeError(f"Unexpected _find response (no 'docs' list): {find_result}")
print("Students with age >= 17:")
for student in docs:
print(f"Name: {student['name']}, Age: {student['age']}")
# %%
print('done')
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment