-
-
Save wolph/f569ba733499a67f1179be97ff69825b to your computer and use it in GitHub Desktop.
| # vim: set ft=dosini: | |
| # Supervisord config for the barman exporter | |
| [program:barman_exporter] | |
| environment=PATH=/usr/local/bin:%(ENV_PATH)s | |
| command=/usr/local/bin/env python3 /var/barman/barman_exporter.py | |
| user=barman | |
| autostart=true | |
| stopasgroup=true | |
| killasgroup=true | |
| autorestart=true | |
| startretries=10000 | |
| stderr_logfile=/var/log/%(program_name)s.err.log | |
| stdout_logfile=/var/log/%(program_name)s.out.log | |
| stdout_logfile_maxbytes=200MB | |
| stdout_logfile_backups=1 | |
| stderr_logfile_maxbytes=200MB | |
| stderr_logfile_backups=1 |
| #!/usr/bin/env python3.6 | |
| import sys | |
| import time | |
| import contextlib | |
| import collections | |
| from datetime import datetime | |
| import prometheus_client | |
| from prometheus_client import core | |
| from barman import cli | |
| from barman import output | |
| from barman import backup | |
| from barman.server import CheckOutputStrategy | |
| class Output(output.ConsoleOutputWriter): | |
| results = collections.defaultdict(dict) | |
| def result_check(self, server_name, check, status, hint=None): | |
| self.results[check] = dict(status=status, hint=hint) | |
| class BarmanCollector: | |
| def __init__(self, args): | |
| self.args = args | |
| self.results = output._writer.results | |
| def collect(self): | |
| cli.global_config(self.args) | |
| servers = cli.get_server_list(self.args) | |
| collectors = dict( | |
| barman_backups=core.GaugeMetricFamily( | |
| 'barman_backups', 'total backups available', | |
| labels=['server']), | |
| barman_last_backup=core.GaugeMetricFamily( | |
| 'barman_last_backup', 'last backup timestamp', | |
| labels=['server']), | |
| barman_last_backup_age=core.GaugeMetricFamily( | |
| 'barman_last_backup_age', 'seconds since last backup', | |
| labels=['server']), | |
| barman_status=core.GaugeMetricFamily( | |
| 'barman_status', 'Several barman status checks', | |
| labels=['server', 'check']) | |
| ) | |
| for server_name, server in servers.items(): | |
| backups = len(server.backup_manager.get_available_backups( | |
| status_filter=(backup.BackupInfo.DONE,))) | |
| collectors['barman_backups'].add_metric([server_name], backups) | |
| last_backup = server.backup_manager.get_last_backup_id() | |
| if last_backup: | |
| now = datetime.now() | |
| last_backup = datetime.strptime(last_backup, '%Y%m%dT%H%M%S') | |
| collectors['barman_last_backup'].add_metric( | |
| [server_name], time.mktime(last_backup.timetuple())) | |
| collectors['barman_last_backup_age'].add_metric( | |
| [server_name], (now - last_backup).total_seconds()) | |
| with contextlib.closing(server): | |
| check_strategy = CheckOutputStrategy() | |
| # Check WAL archive | |
| server.check_archive(check_strategy) | |
| # Postgres configuration is not available on passive nodes | |
| if not server.passive_node: | |
| server.check_postgres(check_strategy) | |
| # Check barman directories from barman configuration | |
| server.check_directories(check_strategy) | |
| # Check retention policies | |
| server.check_retention_policy_settings(check_strategy) | |
| # Check for backup validity | |
| server.check_backup_validity(check_strategy) | |
| # Executes the backup manager set of checks | |
| server.backup_manager.check(check_strategy) | |
| # Check if the msg_list of the server | |
| # contains messages and output eventual failures | |
| server.check_configuration(check_strategy) | |
| # Executes check() for every archiver, passing | |
| # remote status information for efficiency | |
| for archiver in server.archivers: | |
| archiver.check(check_strategy) | |
| # Check archiver errors | |
| server.check_archiver_errors(check_strategy) | |
| collector = collectors['barman_status'] | |
| for name, value in self.results.items(): | |
| key = name.replace(' ', '_').replace('-', '_').lower() | |
| if value['hint']: | |
| continue | |
| collector.add_metric([server_name, key], int(value['status'])) | |
| for collector in collectors.values(): | |
| yield collector | |
| if __name__ == '__main__': | |
| output.set_output_writer(Output()) | |
| class Args: | |
| server_name = ['all'] | |
| quiet = output._writer | |
| debug = output._writer | |
| color = 'auto' | |
| format = debug | |
| args = Args() | |
| core.REGISTRY.register(BarmanCollector(args)) | |
| # Start up the server to expose the metrics. | |
| prometheus_client.start_http_server(8000) | |
| # Generate some requests. | |
| while True: | |
| time.sleep(1) |
Yeah, I initially went the same route but that didn't work too great. It's obvious that Barman was written for a single purpose by someone that is used to writing languages other than Python :)
Your module looks quite nice, I'll probably switch to that one soon. Thanks for packaging it so nicely!
@wolph If you are wondering how to fix your code to not open log files indefinitely you can add:
# [...]
import logging
logging.disable(logging.CRITICAL)
class BarmanCollector:
# [...]
def collect(self):
cli.global_config(self.args)
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# [...]
My pull request with JSON output formatter has been added to Barman 2.9+ and now we can use it. I store the metrics as textfile which is digested by node-exporter. It works better that way because executing barman commands can take a while (even minutes).
Barman exporter here: https://github.com/ahes/prometheus-barman-exporter
You can also install barman-exporter with pip:
pip3 install barman-exporter
Excellent work, that looks great already.
Adding server.backup_manager._backup_cache = None before you use the backup_manager makes sure that backup_manager obtains any new backups, fixing the exporter not updating after a new backup.
Hi,
Here is my barman-exporter: https://github.com/ahes/prometheus-barman-exporter
It exports similar metrics but I did some naming and convention changes to align with prometheus manual about writing exporters.
I started by writing
JsonOutputWriterclass and using barman's cli.py directly but after two hours I decided to let go. Instead I parse barman cli output. It is way simpler and works just fine.Thank you for your gist. It was a great inspiration.