Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ronaldpetty/b27a9cf71c7ebd8c4627274298817109 to your computer and use it in GitHub Desktop.
Save ronaldpetty/b27a9cf71c7ebd8c4627274298817109 to your computer and use it in GitHub Desktop.
mp
* `docker container run --rm -d --name mdb mongo`
* `docker container inspect mdb | grep IPA` # to get IP for code unless this is the only container
* `python3 -m venv faster`
* cd `faster`
* `source ./bin/activate`
* `python3 -m pip install pymongo
To run the code:
* `python3 code.py #writers #readers #docs`
* `python3 code.py 10 10 1000`
The prior command will run 10 inserting processes adding 1000 documents a piece; then 10 querying processes reading the 1000 docs just inserted.
Below is the code.py.
```
import multiprocessing as mp
from pymongo import MongoClient
import datetime
import sys
def drop():
client = MongoClient('172.17.0.2', 27017)
db = client.test_database
collection = db.test_collection
while collection.drop():
print("still here")
pass
print("drop done")
def insert(t):
x, y = t
print(f"{x} started inserting")
client = MongoClient('172.17.0.2', 27017)
db = client.test_database
collection = db.test_collection
dedup = chr(x + 100)
for i in range(y):
id = str(x) + str(i) + dedup
post = {"author": "Mike",
"text": "My first blog post!",
"tags": ["mongodb", "python", "pymongo"],
"date": datetime.datetime.utcnow(),
"_id": id}
collection.insert_one(post)
if i % 100:
print(f"inserted {x}{id}")
print(f"{x} finished inserting")
def query(t):
x, y = t
print(f"{x} started querying")
client = MongoClient('172.17.0.2', 27017)
db = client.test_database
collection = db.test_collection
dedup = chr(x + 100)
for i in range(y):
id = str(x) + str(i) + dedup
result = collection.find_one({"_id": id})
if i % 100:
print(f"retrieved {x}{id}:{result}")
print(f"{x} finished query")
if __name__ == '__main__':
num_insert_procs = int(sys.argv[1])
num_read_procs = int(sys.argv[2])
doc_count = int(sys.argv[3])
drop()
with mp.get_context('spawn').Pool() as p1:
l = list(range(1, num_insert_procs+1))
p1.map(insert, list(((i, doc_count) for i in l)))
with mp.get_context('spawn').Pool() as p2:
l = list(range(1, num_read_procs+1))
p2.map(query, list(((i, doc_count) for i in l)))
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment