Last active
April 23, 2021 03:54
-
-
Save konstruktoid/bcb9daefab6beca67de833b5f547be91 to your computer and use it in GitHub Desktop.
replacing eval getReplicationInfo with python functions -- https://jira.mongodb.org/browse/PYTHON-1717
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def oplogstats(connection): | |
c = connection | |
localdb = c["local"] | |
if "oplog.rs" in localdb.collection_names(): | |
collstats = dict() | |
collstats.clear() | |
collstats = localdb.command("collstats", "oplog.rs") | |
return collstats | |
else: | |
return None | |
def logsizemb(connection): | |
c = connection | |
localdb = c["local"] | |
if "oplog.rs" in localdb.collection_names(): | |
collstats = dict() | |
collstats.clear() | |
collstats = localdb.command("collstats", "oplog.rs") | |
logsizemb = round((collstats["maxSize"] / (1024 * 1024)), 2) | |
return logsizemb | |
else: | |
return None | |
def usedmb(connection): | |
c = connection | |
localdb = c["local"] | |
if "oplog.rs" in localdb.collection_names(): | |
collstats = dict() | |
collstats.clear() | |
collstats = localdb.command("collstats", "oplog.rs") | |
usedmb = round(((collstats["size"] / (1024 * 1024)) * 100) / 100, 2) | |
return usedmb | |
else: | |
return None | |
def tfirst(connection): | |
c = connection | |
localdb = c["local"] | |
if "oplog.rs" in localdb.collection_names(): | |
oplogcol = localdb["oplog.rs"] | |
firstdoc = oplogcol.find_one(sort=[("$natural", 1)]) | |
return firstdoc["ts"].as_datetime() | |
else: | |
return None | |
def tlast(connection): | |
c = connection | |
localdb = c["local"] | |
if "oplog.rs" in localdb.collection_names(): | |
oplogcol = localdb["oplog.rs"] | |
lastdoc = oplogcol.find_one(sort=[("$natural", -1)]) | |
return lastdoc["ts"].as_datetime() | |
else: | |
return None | |
def timediff(connection): | |
oplogcol = connection.local.oplog.rs | |
tsfirst = oplogcol.find_one(sort=[("$natural", 1)])["ts"] | |
tslast = oplogcol.find_one(sort=[("$natural", -1)])["ts"] | |
timediff = tslast.time - tsfirst.time | |
if isinstance(timediff, int): | |
return timediff | |
else: | |
return None | |
def timediffhours(connection): | |
timediffhours = round(((timediff(connection) / 36) / 100), 2) | |
if isinstance(timediffhours, float): | |
return timediffhours | |
else: | |
return None |
Hi @bslade,
been a while since I touched this but you're right, '$natural'
is unnecessary, and I believe the much simpler .find().sort([("ts", 1), ("ts", -1)])
would suffice.
Updated the gist.
Example usage:
from pymongo import MongoClient
import getReplicationInfo
connection = MongoClient('mongodb://localhost:27017/')
db = connection["admin"]
repl = db.command("replSetGetStatus")
repldict = dict()
repldict.clear()
repldict = repl
print(getReplicationInfo.tfirst(connection))
print(getReplicationInfo.tlast(connection))
Haven't done any performance testing at all.
Neat! A couple suggestions:
- You can use
find_one
instead of adding alimit(1)
and$natural
in the sort instead of"ts"
:
>>> from pymongo import ASCENDING, DESCENDING
>>> first_doc = client.local.oplog.rs.find_one(sort=[("$natural", ASCENDING)])
>>> first_doc
{u'wall': datetime.datetime(2020, 7, 14, 22, 40, 25, 41000), u'ts': Timestamp(1594766425, 1), u'o': {u'msg': u'initiating set'}, u'v': 2L, u'ns': u'', u'op': u'n'}
>>> last_doc = client.local.oplog.rs.find_one(sort=[("$natural", DESCENDING)])
>>> last_doc
{u'wall': datetime.datetime(2020, 7, 14, 22, 44, 4, 668000), u'ts': Timestamp(1594766644, 1), u'o': {u'msg': u'periodic noop'}, u't': 1L, u'v': 2L, u'ns': u'', u'op': u'n'}
- You do not need to manually parse the "ts" field into a Timestamp. PyMongo already returns the field as a Timestamp (as seen above). So you can replace this:
for lastdoc in oplogcol.find().sort([("ts", 1), ("ts", -1)]).limit(1):
lastdoc = lastdoc["ts"]
lastdoc = re.findall("[0-9]+", str(lastdoc))
timelastdoc = int(lastdoc[0])
inclastdoc = int(lastdoc[1])
tlast = bson.timestamp.Timestamp(timelastdoc, inclastdoc).as_datetime()
With this:
lastdoc = oplogcol.find_one(sort=[("$natural", -1)])
return lastdoc["ts"].as_datetime()
- The Timestamp type already has a
time
attribute which represents the time in seconds since epoch UTC. You can calculate the difference in seconds between two Timestamps without converting them to datetime at all (see https://pymongo.readthedocs.io/en/stable/api/bson/timestamp.html#bson.timestamp.Timestamp):
def timediff(connection):
oplogcol = connection.local.oplog.rs
tsfirst = oplogcol.find_one(sort=[("$natural", 1)])["ts"]
tslast = oplogcol.find_one(sort=[("$natural", -1)])["ts"]
timediff = tslast.time - tsfirst.time
return timedif
reverse sort the whole oplog to get this one row? Ie., will this query consume a lot of resources to run?
MongoDB will automatically optimize queries on the oplog that sort by $natural
(both ascending and descending). The find_one
(or limit(1)
) means that the server will only scan a single oplog entry which is fast.
Thanks for the code improvements and answering the question about performance @ShaneHarvey!
I've updated the gist.
Test code:
#!/usr/bin/python3
from pymongo import MongoClient
import getReplicationInfo
connection = MongoClient("mongodb://localhost:27017/")
print("oplogstats: " + str(getReplicationInfo.oplogstats(connection)))
print("logsizemb: " + str(getReplicationInfo.logsizemb(connection)))
print("usedmb: " + str(getReplicationInfo.usedmb(connection)))
print("tfirst: " + str(getReplicationInfo.tfirst(connection)))
print("tlast: " + str(getReplicationInfo.tlast(connection)))
print("timediff: " + str(getReplicationInfo.timediff(connection)))
print("timediffhours: " + str(getReplicationInfo.timediffhours(connection)))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For the query to get the first (oldest) oplog entry, instead of:
oplogCol.find().sort([('$natural', 1)]).limit(1)
could you instead just write:
oplogCol.find().limit(1)
Will the search for the last (most recent) timestamp in the oplog:
oplogCol.find().sort([('$natural', -1)]).limit(1)
reverse sort the whole oplog to get this one row? Ie., will this query consume a lot of resources to run?
Thanks in advance
Ben