Created
July 29, 2018 21:35
-
-
Save burnsie7/3f387ab95d99ef3cf1b1f008e16dd1d0 to your computer and use it in GitHub Desktop.
multi_org_hosts_count.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import time | |
import requests | |
import simplejson | |
from datadog import initialize, api | |
""" | |
This script gives a real time report on ec2 and Datadog agent host usage | |
from multiple organizations and reports them up to the 'main' parent account. | |
It relies on the AWS integration being tagged with 'cloud_provider:aws' | |
as seen here: https://cl.ly/0z2z3i3T413W | |
It reports the following metrics that provide real-time data: | |
datadog.hosts.recent - Reports the count of ec2 and Datadog agent hosts currently | |
reporting metrics, excluding muted hosts. | |
datadog.hosts.up - Reports the total number of ec2 and Datadog agent hosts reporting | |
to Datadog within the last 2 hours, including muted hosts. | |
It reports the following metrics from your calculated usage. These metrics are delayed by 48 hours: | |
datadog.usage.agent_host_count | |
datadog.usage.apm_host_count | |
datadog.usage.aws_host_count | |
datadog.usage.container_count | |
datadog.usage.gcp_host_count | |
datadog.usage.host_count | |
This script can be installed as a cron job to retrieve usage metrics. | |
We recommend running once every fifteen minutes. | |
Example cron entry for every 15 minutes: | |
*/15 * * * * /usr/bin/python /home/username/multi_org_hosts_count.py | |
""" | |
ORG_KEYS = { | |
'main': {'api_key': '4xxxxxx9','app_key': 'a21bxxxxxxxf'}, | |
'beta': {'api_key': '7xxxxxxf', 'app_key': 'df6xxxxxxx97'}, | |
'prod': {'api_key': 'cxxxxxxd', 'app_key': '62bxxxxxxxda'}, | |
'prod0': {'api_key': 'xxxxxxe2', 'app_key': '17xxxxxxx3b2'}, | |
'tap': {'api_key': '78xxxxxx', 'app_key': '4934xxxxxxx2'}, | |
} | |
hosts_up = [] | |
hosts_recent = [] | |
host_count_check_up = 0 | |
host_usage_history = [] | |
# get host count for all orgs, including parent | |
now = int(time.time()) | |
host_query = "sum:aws.ec2.host_ok{*}+sum:datadog.agent.running{!cloud_provider:aws}" | |
USAGE_URL = 'https://app.datadoghq.com/api/v1/usage/' | |
def build_standard_url(endpoint, dd_keys): | |
# Example date format for hour: 2018-03-14T09 | |
start = datetime.datetime.now() - datetime.timedelta(hours=48) | |
start_hr = start.strftime('%Y-%m-%dT%H') | |
end = datetime.datetime.now() - datetime.timedelta(hours=47) | |
end_hr = end.strftime('%Y-%m-%dT%H') | |
url = USAGE_URL + endpoint + dd_keys + '&start_hr=' + start_hr + '&end_hr=' + end_hr | |
return url, start_hr | |
def get_usage_metrics(url): | |
usage_metrics = [] | |
try: | |
usage_metrics = requests.get(url).json().get('usage', None) | |
except requests.exceptions.MissingSchema: | |
print('Invalid URL format: {}'.format(url)) | |
except requests.exceptions.ConnectionError: | |
print('Could not connect to url: {}'.format(url)) | |
except simplejson.scanner.JSONDecodeError: | |
print('The response did not contain JSON data') | |
return usage_metrics | |
def format_standard_metrics(metrics, start_hr, tags): | |
metric_list = [] | |
for metric_dict in metrics: | |
try: | |
for k, v in metric_dict.iteritems(): | |
if k == 'hour': | |
if v != start_hr: | |
print('Incorrect hour for metrics. Aborting metric submission.') | |
return | |
else: | |
metric_name = 'datadog.usage.' + k | |
metric_list.append({'metric': metric_name, 'points': v, 'tags': tags}) | |
except AttributeError: | |
print('Metrics are not in dict format') | |
return metric_list | |
for key, value in ORG_KEYS.iteritems(): | |
print('checking value of %s' % key) | |
initialize(**value) | |
try: | |
# get count of hosts reporting metrics, excluding muted hosts | |
last_value = 0 | |
query_result = api.Metric.query(start=now - 900, end=now, query=host_query) | |
if query_result.get('errors', None): | |
print(query_result['errors']) | |
host_count_check_up = 3 | |
else: | |
series = query_result.get('series', None) | |
if series: | |
pointlist = series[0].get('pointlist', None) | |
if pointlist: | |
for point in pointlist: | |
val = point[1] | |
if val > last_value: | |
last_value = val | |
hosts_recent.append({'metric': 'datadog.hosts.recent', 'points': last_value, 'tags': ["env:{}".format(key)]}) | |
# get host up count for all hosts for last 2 hours including muted hosts | |
up_count = 0 | |
host_res = api.Hosts.totals() | |
if host_res.get('errors', None): | |
print(host_res['errors']) | |
host_count_check_up = False | |
up_count = host_res['total_up'] | |
hosts_up.append({'metric': 'datadog.hosts.up', 'points': up_count, 'tags': ["env:{}".format(key)]}) | |
# get the usage metrics from 48 hours ago | |
dd_keys = '?api_key=' + value['api_key'] + '&application_key=' + value['app_key'] | |
url, start_hr = build_standard_url('hosts', dd_keys) | |
metrics = get_usage_metrics(url) | |
tags = ["env:{}".format(key)] | |
metrics_list = format_standard_metrics(metrics, start_hr, tags) | |
if len(metrics_list): | |
host_usage_history.append(metrics_list) | |
else: | |
print('No usage metrics available for endpiont: {}'.format(key)) | |
except Exception as e: | |
print(e) | |
host_count_check_up = 3 | |
# submit active host count to parent org | |
initialize(**ORG_KEYS['main']) | |
for i in hosts_up: | |
metric = i['metric'] | |
points = i['points'] | |
tags = i['tags'] | |
api.Metric.send(metric=metric, points=points, tags=tags) | |
for i in hosts_recent: | |
metric = i['metric'] | |
points = i['points'] | |
tags = i['tags'] | |
api.Metric.send(metric=metric, points=points, tags=tags) | |
for metric_list in host_usage_history: | |
api.Metric.send(metric_list) | |
api.ServiceCheck.check(check='datadog.host_count_check', host_name='temp', status=host_count_check_up) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment