import datetime import itertools import random import sys import timeit from collections import defaultdict from pymongo import version as pymongo_version from distutils.version import StrictVersion import mongoengine as db from pycallgraph.output.graphviz import GraphvizOutput from pycallgraph.pycallgraph import PyCallGraph db.connect("test-dicts") class Data(db.EmbeddedDocument): subf0 = db.ListField(db.IntField()) subf1 = db.ListField(db.IntField()) subf2 = db.ListField(db.IntField()) subf3 = db.ListField(db.IntField()) subf4 = db.ListField(db.IntField()) class MyDictModel(db.Document): date = db.DateTimeField(required=True, default=datetime.date.today) data_dict_1 = db.DictField() class MyEmbedModel(db.Document): date = db.DateTimeField(required=True, default=datetime.date.today) data_dict_1 = db.EmbeddedDocumentField(Data) MyDictModel.drop_collection() MyEmbedModel.drop_collection() data = ["subf{}".format(f) for f in range(5)] m_dict = MyDictModel() my_dict = dict([(d, list(random.sample(range(50000), 20000))) for d in data]) m_dict.data_dict_1 = my_dict m_dict.save() m_embed = MyEmbedModel() my_data = Data() for f in data: my_data[f] = list(random.sample(range(50000), 20000)) m_embed.data_dict_1 = my_data m_embed.save() def pymongo_dict_doc(): r = db.connection.get_connection()["test-dicts"]['my_dict_model'].find_one() print((type(r), len(r))) return r def pymongo_embed_doc(): r = db.connection.get_connection()["test-dicts"]['my_embed_model'].find_one() print((type(r), len(r))) return r def mongoengine_dict_doc(): r = MyDictModel.objects.first() print((type(r.data_dict_1), len(r.data_dict_1))) return r def mongoengine_embed_doc(): r = MyEmbedModel.objects.first() print((type(r.data_dict_1), len(r.data_dict_1))) return r def mongoengine_dict_docp(): r = MyDictModel.objects.as_pymongo().first() print((type(r), len(r))) return r def mongoengine_embed_docp(): r = MyEmbedModel.objects.as_pymongo().first() print((type(r), len(r))) return r def mongoengine_agg_doc(): r = list(MyDictModel.objects.aggregate({"$limit":1}))[0] print((type(r), len(r))) return r def mongoengine_agg_embed(): r = list(MyEmbedModel.objects.aggregate({"$limit":1}))[0] print((type(r), len(r))) return r #return if __name__ == '__main__': print("pymongo with dict took {:2.2f}s".format(timeit.timeit(pymongo_dict_doc, number=10))) print("pymongo with embed took {:2.2f}s".format(timeit.timeit(pymongo_embed_doc, number=10))) print("mongoengine with dict took {:2.2f}s".format(timeit.timeit(mongoengine_dict_doc, number=10))) print("mongoengine with embed took {:2.2f}s".format( timeit.timeit(mongoengine_embed_doc, number=10))) print("mongoengine with dict as_pymongo() took {:2.2f}s".format(timeit.timeit(mongoengine_dict_docp, number=10))) print("mongoengine with embed as_pymongo() took {:2.2f}s".format( timeit.timeit(mongoengine_embed_docp, number=10))) if StrictVersion(pymongo_version) < StrictVersion('3.0.0'): print("Skipping aggregation on pymongo < 3.x") else: print("mongoengine aggregation with dict took {:2.2f}s".format( timeit.timeit(mongoengine_agg_doc, number=10))) print("mongoengine aggregation with embed took {:2.2f}s".format( timeit.timeit(mongoengine_agg_embed, number=10))) out1 = GraphvizOutput() out1.output_file = "viz_embed.png" out2 = GraphvizOutput() out2.output_file = "viz_dict.png" with PyCallGraph(output=out1): mongoengine_embed_doc() with PyCallGraph(output=out2): mongoengine_dict_doc()