Skip to content

Instantly share code, notes, and snippets.

@border
Created June 18, 2011 15:21
Show Gist options
  • Save border/1033174 to your computer and use it in GitHub Desktop.
Save border/1033174 to your computer and use it in GitHub Desktop.
How to save 200% RAM by selecting the right key data type for #MongoDB
Intel(R) Core(TM) i3 CPU M 350 @ 2.27GHz
Mem 2G
bvcom@ubuntu:~/work/db/test$ time python mongoIndexTest.py
ObjectID
int
Base64 BSON
string
real 4m59.993s
user 2m30.729s
sys 2m14.420s
> show collections
base64s
ints
objectids
strings
system.indexes
> db.ints.stats()
{
"ns" : "test.ints",
"count" : 1000000,
"size" : 24000040,
"avgObjSize" : 24.00004,
"storageSize" : 43057152,
"numExtents" : 11,
"nindexes" : 1,
"lastExtentSize" : 10440704,
"paddingFactor" : 1,
"flags" : 1,
"totalIndexSize" : 32522240,
"indexSizes" : {
"_id_" : 32522240
},
"ok" : 1
}
> db.objectids.stats()
{
"ns" : "test.objectids",
"count" : 1000000,
"size" : 32000032,
"avgObjSize" : 32.000032,
"storageSize" : 55586816,
"numExtents" : 12,
"nindexes" : 1,
"lastExtentSize" : 12529664,
"paddingFactor" : 1,
"flags" : 1,
"totalIndexSize" : 41598976,
"indexSizes" : {
"_id_" : 41598976
},
"ok" : 1
}
> db.base64s.stats()
{
"ns" : "test.base64s",
"count" : 1000000,
"size" : 40000032,
"avgObjSize" : 40.000032,
"storageSize" : 70623232,
"numExtents" : 13,
"nindexes" : 1,
"lastExtentSize" : 15036416,
"paddingFactor" : 1,
"flags" : 1,
"totalIndexSize" : 67117056,
"indexSizes" : {
"_id_" : 67117056
},
"ok" : 1
}
> db.strings.stats()
{
"ns" : "test.strings",
"count" : 1000000,
"size" : 56000016,
"avgObjSize" : 56.000016,
"storageSize" : 88670208,
"numExtents" : 14,
"nindexes" : 1,
"lastExtentSize" : 18046976,
"paddingFactor" : 1,
"flags" : 1,
"totalIndexSize" : 90267648,
"indexSizes" : {
"_id_" : 90267648
},
"ok" : 1
}
#!/usr/bin/env python
#coding: utf8
# This Code copy from AlexDong.com
# http://notes.alexdong.com/choose-the-right-data-type-for-mongodb
import pymongo
import bson
import hashlib
from pymongo import Connection
db = Connection().test
count = 1000000
print('ObjectID')
for i in range(0, count):
db.objectids.insert({'i': i})
print('int')
for i in range(0, count):
db.ints.insert({'_id': i, 'i': i})
print('Base64 BSON')
for i in range(0, count):
db.base64s.insert({'_id': \
bson.Binary(hashlib.md5(str(i)).digest(),
bson.binary.MD5_SUBTYPE), 'i': i})
print('string')
for i in range(0, count):
db.strings.insert({'_id': hashlib.md5(str(i)).hexdigest(), 'i': i})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment