-
-
Save zastari/5109724 to your computer and use it in GitHub Desktop.
#!/usr/bin/python | |
""" | |
python-bottle based MySQL replication monitor. | |
Configuration is specified via [server:<server_name>] and [bottle] directives | |
in /etc/rack_replmon/monitor.conf. The monitor will parse any entry that | |
begins with ^server as directives for a server to scan | |
Valid [server:<server_name>] directives: | |
host, port, user, passwd | |
Valid [bottle] directives: | |
host, port | |
If any server fails to meet the following replication requirements, | |
an error is returned: | |
- Slave lag > 60s | |
- Slave_IO_Thread not running | |
- Slave_SQL_Thread not running | |
If all tests succeed, the first line will match ^OK: | |
If any tests fail, the first line will match ^BAD: | |
""" | |
from bottle import route, run, template | |
import MySQLdb | |
import ConfigParser | |
import sys | |
class Config(ConfigParser.ConfigParser): | |
def __getitems__(self, section): | |
try: | |
return dict(self.items(section)) | |
except ConfigParser.NoSectionError: | |
raise KeyError(section) | |
def test_slave(server): | |
global config | |
dbh = None | |
dsn = {"host" : "localhost", "port" : 3306, | |
"user" : "root", "passwd" : ""} | |
try: | |
dsn.update(config.__getitems__(server)) | |
except KeyError, exc: | |
return (1, "Server %s not found in configuration" % server) | |
try: | |
dbh = MySQLdb.connect(host=dsn["host"], port=int(dsn["port"]), | |
user=dsn["user"], passwd=dsn["passwd"]); | |
cur = dbh.cursor(MySQLdb.cursors.DictCursor) | |
cur.execute("SHOW SLAVE STATUS") | |
slave_status = cur.fetchone() | |
if slave_status["Seconds_Behind_Master"] <= 60 and slave_status["Slave_IO_Running"] == "Yes" and slave_status["Slave_SQL_Running"] == "Yes": | |
return (0, "+ Checks for server %s:%s completed successfully" % (dsn["host"], dsn["port"])) | |
else: | |
fail_print = " Relay coordinates: %s %d\n Master coordinates: %s %d\n IO Error: %d -- %s\n SQL Error: %d -- %s" % (slave_status["Relay_Log_File"], slave_status["Relay_Log_Pos"], slave_status["Relay_Master_Log_File"], slave_status["Exec_Master_Log_Pos"], slave_status["Last_IO_Errno"], slave_status["Last_IO_Error"], slave_status["Last_SQL_Errno"], slave_status["Last_SQL_Error"]) | |
return (1, "x Replication Error: Checks failed for server %s:%s\n%s" % (dsn["host"], dsn["port"], fail_print)) | |
except MySQLdb.MySQLError, exc: | |
return (1, "x Connection Error %d: %s" % (exc.args[0],exc.args[1])) | |
finally: | |
if dbh: | |
dbh.close() | |
def enumerate_slaves(slave_filter): | |
global config | |
test_success = 1 | |
test_status_list = [] | |
if slave_filter == None: | |
server_list = [server for server in config.sections() if server.startswith("server")] | |
else: | |
slave_filter = "server:" + slave_filter | |
if slave_filter in config.sections(): | |
server_list = [slave_filter] | |
else: | |
return "Server %s not found in configuration\n" % slave_filter | |
for server in server_list: | |
(test_return, status_string) = test_slave(server) | |
if test_return != 0: | |
test_success = 0 | |
test_status_list.append(status_string) | |
if test_success == 1: | |
test_status_list.insert(0, "OK: All checks completed successfully") | |
else: | |
test_status_list.insert(0, "BAD: At least one check failed") | |
return '\n'.join(test_status_list) + '\n' | |
def main(): | |
global config | |
config = Config() | |
try: | |
config.readfp(open("/etc/rack_replmon/monitor.conf")) | |
except ConfigParser.Error, exc: | |
print >>sys.stderr, "Failed to parse config ", exc | |
return 1 | |
except IOError, exc: | |
print >>sys.stderr, "Failed to open config ", exc | |
return 1 | |
@route('/<server>') | |
@route('/') | |
def index(server=None): | |
if server == None: | |
slave_status = enumerate_slaves(None) | |
else: | |
slave_status = enumerate_slaves(server) | |
return template('{{slave_status}}', slave_status=slave_status) | |
bottle_server = {"host" : "localhost", "port" : 8080} | |
if "bottle" in config.sections(): | |
bottle_server.update(config.__getitems__("bottle")) | |
run(host=bottle_server["host"], port=bottle_server["port"]) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
Also worth noting is this gotcha:
@route('/')
def index(slave_status = enumerate_slaves(config)):
...
The slave_status argument is initialized when the script is loaded - but not on every index() call. Further, arguments to bottle routes should be simple strings (for the most part) and not complex objects - . These are effectively just convenience methods for pulling parts out of url strings.
Instead you should calculate slave_status within the method so it is refreshed each time a request comes in:
@route('/')
def index():
slave_status = enumerate_slaves(config)
return template(...)
You may want to make this route parameterized, however, to support looking a specific slave rather than all slaves:
@route('/<name>')
def index(name=None):
if name is None:
# check all slaves
else:
check_slave(name)
This may be better achieves by different routes, though. E.g. /check/all vs. /check/
To support this I would probably change the API to have enumerate_slaves just return valid sections from the config and then have test_slave accept a section_name as an argument:
def enumerate_slaves():
global config
return [name for name in config.sections() if name.startswith('server')]
def test_slave(name):
global config
section = config[name]
...
Thanks for the recommendations Andy. I've implemented all of them except for parsing MySQL default files, and I've cleaned up the code in quite a few places. I've got several changes listed and a few more questions.
Changes:
- I pushed the connection logic from enumerate_slaves into test_slave so that test_slave takes a section name. This made more sense given the functions purpose
- enumerate_slave takes a section label as an input and will only perform lookups for that slave if it's passed in. This uses the route templating that you outlined above
- conf format for server IDs is [server:] now.
Questions:
- I noticed that you pass config to each function as a global variable. I was typically under the assumption that globals were to be avoided whenever possible. Is there a reason this is preferable to passing the config object in as a function argument?
- I have this ugly construct on lines 89-99 to parse the server list. Whenever I passed a single server as an argument, it was treated as a string and the for loop iterated over each character rather than the whole string. Is there a better way to avoid this construct?
if isinstance(server_list, list):
for server in server_list:
(test_return, status_string) = test_slave(server)
if test_return != 0:
test_success = 0
test_status_list.append(status_string)
else:
(test_return, status_string) = test_slave(server_list)
if test_return != 0:
test_success = 0
test_status_list.append(status_string)
From here, I have the following goals outlined next:
- Get tests built (I was looking at using nosetest. Is there anything else I should consider using?)
- Get a logger class implemented so that failures could have relevant slave info dumped to file
- Parse argv to let conf file and bottle arguments be configurable from the command line
I fixed the ugly if/else construct with the following so that's cleared now:
old:
server_list = slave_filter
new:
server_list = [slave_filter]
then just turned the loop into
for server in server_list:
(test_return, status_string) = test_slave(server)
if test_return != 0:
test_success = 0
test_status_list.append(status_string)
Questions:
- I noticed that you pass config to each function as a global variable. I was typically under the assumption that globals were to be avoided whenever possible. Is there a reason this is preferable to passing the config object in as a function argument?
I do recommend avoiding global variables as a best practice, but for simple scripts having a global 'config' object is not uncommon. config was already in a global scope here, so I just referenced it like that in my examples. You can certainly just pass around the config instance to each function that needs it. You might stuff the config dictionary into the Bottle.config instance, and the individual request handles can pass that dict around from there:
import bottle
def main():
repl_config = ConfigParser.readfp(open(...))
app = bottle.default_app()
for section in repl_config.sections():
app.config[section] = repl_config[section]
This is a little messy since ConfigParser doesn't have a clean way to make itself into a dictionary, but only adds two lines. In the route you can reference that dict and pass it around to the other test functions:
@route('/')
def index():
app = bottle.default_app()
config = app.config
for slave in enumerate_slaves(config):
test_slave(config[slave])
Otherwise, there's not really a clean way to push the config into without effectively using a global variable. Of course, bottle.default_app() is effectively a global variable too, but it's part of the bottle api and we're not adding any additional global state.
I would keep the route definitions in the global scope rather than having nested functions in main().
From here, I have the following goals outlined next:
- Get tests built (I was looking at using nosetest. Is there anything else I should consider using?)
I am a big fan of nosetests for unit testing. python also has a unittest module in the stdlib, but it requires a lot more boiler plate and nosetests can also run those tests through its interface as well. I only use unittest if i'm integrating some existing test case (sometimes from a python backport for compatibility, etc.), but always test with the nosetest frontend.
Some other tools that are very useful to look at that I use frequently:
- pyflakes - this is effectively a pep8 style checker and you'll probably learn a lot of about standard python style from it
https://pypi.python.org/pypi/pyflakes
- pylint - a much more in-depth static code checker - it can often find subtle bugs in your code.
https://pypi.python.org/pypi/pylint
It also comments on style but can be really pedantic, even with very idiomatic python. It's extremely useful to catch cases where you typo'd a variable name that would not otherwise be caught until runtime and the style recommendations are useful to understand even when they can be ignored.
- coverage.py - shows you how much of the code your tests are actually covering
https://pypi.python.org/pypi/coverage
This integrates well with nosetests and can help you find code paths you may have missed with your test cases
- mocking - stub out complex bits of code so you can unit test your code
I use some mock libraries to fake out query/result patterns, so I can test my code for the case that "If mysql returns this result, am I doing the right thing?"
I use mocker for this in some of the holland tests:
https://pypi.python.org/pypi/mocker
For example, I might write a set of mocks that return different output for SHOW SLAVE STATUS and make sure test_slave does the right thing with that output, so I don't have to setup a full mysql replication slave to run my unit tests
Of course, with mock you still want to test against the real thing as well, but for unit tests mock objects can be really useful.
A few things:
Namely this means for keyword values you don't want spaces around the '=' sign.
run(host=bottle_server["host"], port=bottle_server["port"])
This lets you import the script if you want to write tests later, or to import to poke at specific functions while debugging the script. The if name == 'main' part guards against running the actual script unless it is invoked directly. E.g. 'import replmonitor' won't run main(), but 'python replmonitor.py' will.
This lets you access the config like a dictionary and it will return a dict() of options. So after you read() in a config you can do something like:
Without bouncing through some accessor method.
This is slightly faster and more idiomatic python.
This also lets you do clever things like having a .my.cnf like this:
~/.my.cnf:
And then you can set the defaults group when you connect to read the right section:
This will read the [client] section in all cases, but override with anything in the "[slave-${host}]" section. This makes your script behave more like a standard mysql tool. You can still override the host,passwd,user,etc. fields from the app config by additional keyword arguments to connect().