vvalorous · March 27, 2018 06:53
diff --git a/cloudwatcher.py b/cloudwatcher.py
 #!/usr/bin/env python

 import boto3
 import json
 import urllib
 import urllib2
 from datetime import datetime, timedelta

 # #################################################

 # Region where we read metrics from
 AWS_REGION='us-west-1'

 # PagerDuty API URL to trigger events
 PAGERDUTY_URL='https://events.pagerduty.com/generic/2010-04-15/create_event.json'

 # API Key we use to contact PagerDuty, this is the generic one that PagerDuty
 # provides freely
 PAGERDUTY_SERVICE_KEY='w_8PcNuhHa-y3xYdmc1x'

 # We currently put metrics every 5 minutes in baseline and also run the lambda
 # every 5 minutes. So it is OK to check the past 5 minutes for metrics
 CLOUDWATCH_PERIOD=300

 # These are all the metrics that we read in order to determine if there is an
 # issue happening with our monitor
 CLOUDWATCH_METRICS=[
    {
        'namespace': 'Test',
        'metricname': 'TestValueWithNoDimensions'
    },
    {
        'namespace': 'Test2',
        'metricname': 'MyMetric',
        'dimensions': [
            {
                'Name': 'Dimension1',
                'Value': 'NameOfTheValue'
            }
        ]
    },
  ]

 # #################################################

 def pagerduty():
    print 'INFO: Alerting on call support using PagerDuty'

    headers = {
        'content-type': 'application/json'
    }

    params = json.dumps({
        'incident_key': 'MonitorOfMonitors'
        'service_key':  PAGERDUTY_SERVICE_KEY,
        'event_type':   'trigger',
        'description':  'FAILURE reading metrics from CloudWatch',
        'client':       'CloudWatcher'
    }).encode('utf8')

    req = urllib2.Request(PAGERDUTY_URL, params, headers)

    try:
        print 'INFO: Creating alert'
        rsp = urllib2.urlopen(req)
        data = json.loads(rsp.read())
    except urllib2.HTTPError as e:
        print 'ERROR: Not able to create alert'
        if hasattr(e, 'reason'):
            print 'Reason: %s' % e.reason
    except urllib2.URLError as e:
        print 'ERROR: URL malformed'
        if hasattr(e, 'reason'):
            print 'Reason: %s' % e.reason
    else:
        if data['status'] == 'success':
            print 'INFO: Alert was sent correctly'
        else:
            print 'ERROR: Non success response from PagerDuty'

        print 'INFO: PagerDuty Response= %s' % data

 # #################################################

 def lambda_handler(event, context):
    print 'INFO: Lambda handler activated'
    print 'INFO: Event ID is %s' % event['id']

    main()

 # #################################################

 def main():
    print 'INFO: Starting task'

    c = boto3.client('cloudwatch', region_name=AWS_REGION)

    print 'INFO: Getting metrics from CloudWatch'

    total_error = 0

    for metric in CLOUDWATCH_METRICS:
        print 'INFO: Reading metric %s from namespace %s' % (metric['metricname'], metric['namespace'])

        if 'dimensions' in metric.keys():
            response = c.get_metric_statistics(
                Namespace=metric['namespace'],
                MetricName=metric['metricname'],
                Dimensions=metric['dimensions'],
                StartTime=datetime.utcnow() - timedelta(seconds=CLOUDWATCH_PERIOD),
                EndTime=datetime.utcnow(),
                Period=CLOUDWATCH_PERIOD,
                Statistics=['Average', 'Minimum', 'Maximum'],
                Unit='Count'
            )
        else:
            response = c.get_metric_statistics(
                Namespace=metric['namespace'],
                MetricName=metric['metricname'],
                StartTime=datetime.utcnow() - timedelta(seconds=CLOUDWATCH_PERIOD),
                EndTime=datetime.utcnow(),
                Period=CLOUDWATCH_PERIOD,
                Statistics=['Average', 'Minimum', 'Maximum'],
                Unit='Count'
            )

        dp = response['Datapoints']

        if len(dp) == 0:
            print 'WARN: Response from CloudWatch was empty for metric %s in namespace %s' % (metric['metricname'], metric['namespace'])
            total_error += 1
        else:
            print 'INFO: Metric %s in namespace %s is fine' % (metric['metricname'], metric['namespace'])

    if total_error > 0:
        print 'INFO: A total of %d metrics had errors' % total_error
        pagerduty()

 # #################################################

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python

	import boto3
	import json
	import urllib
	import urllib2
	from datetime import datetime, timedelta

	# #################################################

	# Region where we read metrics from
	AWS_REGION='us-west-1'

	# PagerDuty API URL to trigger events
	PAGERDUTY_URL='https://events.pagerduty.com/generic/2010-04-15/create_event.json'

	# API Key we use to contact PagerDuty, this is the generic one that PagerDuty
	# provides freely
	PAGERDUTY_SERVICE_KEY='w_8PcNuhHa-y3xYdmc1x'

	# We currently put metrics every 5 minutes in baseline and also run the lambda
	# every 5 minutes. So it is OK to check the past 5 minutes for metrics
	CLOUDWATCH_PERIOD=300

	# These are all the metrics that we read in order to determine if there is an
	# issue happening with our monitor
	CLOUDWATCH_METRICS=[
	{
	'namespace': 'Test',
	'metricname': 'TestValueWithNoDimensions'
	},
	{
	'namespace': 'Test2',
	'metricname': 'MyMetric',
	'dimensions': [
	{
	'Name': 'Dimension1',
	'Value': 'NameOfTheValue'
	}
	]
	},
	]

	# #################################################

	def pagerduty():
	print 'INFO: Alerting on call support using PagerDuty'

	headers = {
	'content-type': 'application/json'
	}

	params = json.dumps({
	'incident_key': 'MonitorOfMonitors'
	'service_key': PAGERDUTY_SERVICE_KEY,
	'event_type': 'trigger',
	'description': 'FAILURE reading metrics from CloudWatch',
	'client': 'CloudWatcher'
	}).encode('utf8')

	req = urllib2.Request(PAGERDUTY_URL, params, headers)

	try:
	print 'INFO: Creating alert'
	rsp = urllib2.urlopen(req)
	data = json.loads(rsp.read())
	except urllib2.HTTPError as e:
	print 'ERROR: Not able to create alert'
	if hasattr(e, 'reason'):
	print 'Reason: %s' % e.reason
	except urllib2.URLError as e:
	print 'ERROR: URL malformed'
	if hasattr(e, 'reason'):
	print 'Reason: %s' % e.reason
	else:
	if data['status'] == 'success':
	print 'INFO: Alert was sent correctly'
	else:
	print 'ERROR: Non success response from PagerDuty'

	print 'INFO: PagerDuty Response= %s' % data

	# #################################################

	def lambda_handler(event, context):
	print 'INFO: Lambda handler activated'
	print 'INFO: Event ID is %s' % event['id']

	main()

	# #################################################

	def main():
	print 'INFO: Starting task'

	c = boto3.client('cloudwatch', region_name=AWS_REGION)

	print 'INFO: Getting metrics from CloudWatch'

	total_error = 0

	for metric in CLOUDWATCH_METRICS:
	print 'INFO: Reading metric %s from namespace %s' % (metric['metricname'], metric['namespace'])

	if 'dimensions' in metric.keys():
	response = c.get_metric_statistics(
	Namespace=metric['namespace'],
	MetricName=metric['metricname'],
	Dimensions=metric['dimensions'],
	StartTime=datetime.utcnow() - timedelta(seconds=CLOUDWATCH_PERIOD),
	EndTime=datetime.utcnow(),
	Period=CLOUDWATCH_PERIOD,
	Statistics=['Average', 'Minimum', 'Maximum'],
	Unit='Count'
	)
	else:
	response = c.get_metric_statistics(
	Namespace=metric['namespace'],
	MetricName=metric['metricname'],
	StartTime=datetime.utcnow() - timedelta(seconds=CLOUDWATCH_PERIOD),
	EndTime=datetime.utcnow(),
	Period=CLOUDWATCH_PERIOD,
	Statistics=['Average', 'Minimum', 'Maximum'],
	Unit='Count'
	)

	dp = response['Datapoints']

	if len(dp) == 0:
	print 'WARN: Response from CloudWatch was empty for metric %s in namespace %s' % (metric['metricname'], metric['namespace'])
	total_error += 1
	else:
	print 'INFO: Metric %s in namespace %s is fine' % (metric['metricname'], metric['namespace'])

	if total_error > 0:
	print 'INFO: A total of %d metrics had errors' % total_error
	pagerduty()

	# #################################################

	if __name__ == "__main__":
	main()