Created
December 14, 2012 21:33
-
-
Save anonymous/4288833 to your computer and use it in GitHub Desktop.
Benchmarking MongoDB for efficient indexes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from pymongo import Connection, ASCENDING, DESCENDING | |
from datetime import datetime | |
c = Connection() | |
db = c['asynctask'] | |
# Will select about 7/10 of the logs | |
now = datetime.now() | |
half = datetime(2012, 12, 13, 20, 30, 30, 0) | |
def explain(r, what): | |
try: | |
e = what.explain() | |
except Exception, e: | |
print "| %02d | %s" % (r, e) | |
else: | |
print "| %02d | %.2fs SAO=%d =%s= n=%d s=%d" % (r, | |
e["millis"]/1000., | |
int(e["scanAndOrder"]), | |
e["cursor"], | |
e["n"], | |
e["nscannedObjects"]) | |
def stats(coll): | |
s = db.command("collstats", coll) | |
print "| | Total index size: %d MB (out of %d MB)" % (s["totalIndexSize"]/1024/1024, | |
s["size"]/1024/1024) | |
explain(1, db.tasks.find({"depends": "unknown"}).limit(1)) | |
explain(2, db.tasks.find({"created": {"$lt": now}, | |
"status": "finished", | |
"tried": {"$lt": 5}})) | |
explain(3, db.tasks.find().sort("_id", -1)) | |
explain(4, db.tasks.find({"tried": {"$gt": 3}}).sort("_id", -1)) | |
explain(5, db.tasks.find({"type": "most-common-type"}).sort("_id", -1)) | |
explain(6, db.tasks.find({"rtask_id": 18}).sort("_id", -1)) | |
explain(7, db.tasks.find({"tried": {"$gt": 3}, | |
"type": "most-common-type"}).sort("_id", -1)) | |
explain(8, db.tasks.find({"status": "finished", | |
"type": "most-common-type"}).sort("_id", -1)) | |
explain(9, db.tasks.find({"status": "queued", | |
"type": "most-common-type"}).sort("_id", -1)) | |
explain(10, db.tasks.find({"status": "queued", | |
"type": "most-common-type", | |
"tried": {"$lt": 5}})) | |
explain(11, db.tasks.find({"status": {"$in": ["finished", "running"]}, | |
"type": "most-common-type"}).sort("_id", -1)) | |
explain(12, db.tasks.find({"status": "finished", | |
"type": "most-common-type", | |
"finished": {"$gt": half}})) | |
explain(13, db.tasks.find({"status": {"$in": ["queued", "running"]}, | |
"reserved": {"$lt": now}, | |
"type": {"$in": ["most-common-type"]}, | |
"tried": {"$lt": 5}, | |
"ns": "default"}).sort([("priority", DESCENDING), | |
("_id", ASCENDING)]).limit(1)) | |
explain(14, db.tasks.find({"status": "queued", | |
"reserved": {"$lt": now}, | |
"type": {"$in": ["most-common-type"]}, | |
"tried": {"$lt": 5}, | |
"ns": "default"}).sort([("priority", DESCENDING), | |
("_id", ASCENDING)]).limit(1)) | |
stats("tasks") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import string | |
import time | |
from datetime import datetime | |
from pymongo import Connection | |
c = Connection() | |
db = c["asynctask"] | |
types = ["most-common-type",]*40 + [''.join(random.choice(string.letters) for n in xrange(20)) for n in xrange(10)] | |
hosts = ["%s.dailymotion.com" % ''.join(random.choice(string.digits) for n in xrange(10)) for n in xrange(150)] | |
start = time.mktime(datetime(2012, 12, 13, 20, 0, 30, 0).timetuple()) | |
namespaces = ["default",]*10 + ["other-ns1", "other-ns2"] | |
# Tasks. | |
# First half of the tasks are finished. Tenth of those are failed. | |
# Tenth of the remaining are running or finished (tenth again). | |
# The remaining tasks are queued with tenth of them running. | |
count = 5000000 | |
for n in xrange(count): | |
if n < count/2: | |
if random.randint(0,10) == 0: | |
status = "failed" | |
else: | |
status = "finished" | |
elif n < 6*count/10: | |
if random.randint(0, 10) == 0: | |
status = "finished" | |
else: | |
status = "running" | |
else: | |
if random.randint(0, 10) == 0: | |
status = "running" | |
else: | |
status = "queued" | |
created = start + n*3600/count | |
started = created + random.randrange(10, 100) if status != "queued" else None | |
finished = started + random.randrange(10, 200) if status in ("finished", "failed") else None | |
if status == "queued": | |
reserved = created | |
elif status == "running": | |
reserved = started + 30 | |
else: | |
reserved = finished + 30 | |
tried = random.choice([1,]*10 + range(2,6)) | |
if status == "queued": tried = tried - 1 | |
db.tasks.insert({"status": status, | |
"created": datetime.fromtimestamp(created), | |
"started": datetime.fromtimestamp(started) if started else None, | |
"finished": datetime.fromtimestamp(finished) if finished else None, | |
"reserved": datetime.fromtimestamp(reserved), | |
"depends": [], | |
"callback_id": "not important", | |
"callback_parameters": {"param1": 66, "param2": 8457}, | |
"worker_host": random.choice(hosts) if status != "queued" else None, | |
"worker_name": "tictac-16" if status != "queued" else None, | |
"result": status == "failed" and "failed logs" or None, | |
"ns": random.choice(namespaces), | |
"tried": tried, | |
"type": random.choice(types)}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment