Created
October 1, 2013 19:13
-
-
Save iandanforth/6783556 to your computer and use it in GitHub Desktop.
Updated, smugmug specific boto data collection script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #------------------------------------------------------------------------------- | |
| # Copyright (C) 2013 Numenta Inc. All rights reserved. | |
| # | |
| # The information and source code contained herein is the | |
| # exclusive property of Numenta Inc. No part of this software | |
| # may be used, reproduced, stored or distributed in any form, | |
| # without explicit written authorization from Numenta Inc. | |
| #------------------------------------------------------------------------------- | |
| desc = """ | |
| This tool will find instances with the tag specified in the accompanying | |
| boto-config.yaml and then collect the last two weeks of cloudwatch data from | |
| them.""" | |
| import optparse | |
| import os | |
| import boto | |
| import sys | |
| import csv | |
| import yaml | |
| from boto import ec2, rds, sqs | |
| from boto.ec2 import cloudwatch | |
| from datetime import timedelta, datetime | |
| AWS_REGIONS = { | |
| "ap-northeast-1":"Asia Pacific (Tokyo) Region", | |
| "ap-southeast-1":"Asia Pacific (Singapore) Region", | |
| "ap-southeast-2":"Asia Pacific (Sydney) Region", | |
| "eu-west-1":"EU (Ireland) Region", | |
| "sa-east-1":"South America (Sao Paulo) Region", | |
| "us-east-1":"US East (Northern Virginia) Region", | |
| "us-west-1":"US West (Northern California) Region", | |
| "us-west-2":"US West (Oregon) Region" | |
| } | |
| def main(options): | |
| # Load configuration | |
| with open(options.configFile, 'r') as fh: | |
| config = yaml.load(fh) | |
| # Get credentials | |
| AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID'] | |
| AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY_ID'] | |
| # A code for the user whose data we're collecting | |
| datasetCode = config['DatasetCode'] | |
| # The period of time we want data for | |
| startTime = datetime(**config['StartTime']) | |
| endTime = datetime(**config['EndTime']) | |
| # Define the stats we want | |
| statistics = ["Average","Minimum","Maximum"] | |
| # Which services we'll pull from. NOTE: Only EC2 at the moment | |
| services = config['Services'] | |
| for region in config['Regions']: | |
| print "Now working on region: %s ..." % region | |
| # Connect to cloudwatch | |
| cwConn = cloudwatch.connect_to_region(region_name=region, | |
| aws_access_key_id=AWS_ACCESS_KEY_ID, | |
| aws_secret_access_key=AWS_SECRET_ACCESS_KEY) | |
| for k, v in services.iteritems(): | |
| print "Working on service: %s ..." % k | |
| metricNames = v['Metrics'] | |
| tag = v['Tag'] | |
| if not tag: | |
| print "WARNING: No tag specified this might pull data from *many* servers." | |
| input = raw_input("Continue? [y/n]: ") | |
| if input not in ['y', 'yes', 'Y', 'Yes', 'YES']: | |
| sys.exit(1) | |
| # Connect to service NOTE: EC2 specific for now | |
| ec2Conn = ec2.connect_to_region(region_name=region, | |
| aws_access_key_id=AWS_ACCESS_KEY_ID, | |
| aws_secret_access_key=AWS_SECRET_ACCESS_KEY) | |
| # REMOVE KEY AS THIS IS NOT HOW SMUGMUG USES THEM | |
| tag = None | |
| filters = {} | |
| if tag: | |
| filters["tag-key"] = tag | |
| # Stupid EC2 wrapper needs to be removed | |
| reservations = ec2Conn.get_all_instances(filters=filters) | |
| instances = [] | |
| for res in reservations: | |
| for instance in res.instances: | |
| role = '' | |
| for tagName, tagValue in instance.tags.iteritems(): | |
| # Collect the tier (role) the server is labeled as | |
| if tagName == 'tier': | |
| role = tagValue | |
| # Use this instance only if it's labeled properly | |
| if ':numenta:' in tagValue and instance not in instances: | |
| instances.append((instance, role)) | |
| # Loop over discovered instances | |
| for instance, role in instances: | |
| print "Retrieving data for Instance: %s ..." % instance.id | |
| for metricName in metricNames: | |
| print "Getting %s ..." % metricName | |
| metric = cwConn.list_metrics(metric_name=metricName, | |
| dimensions={"InstanceId": instance.id}, | |
| namespace="AWS/EC2") | |
| if metric: | |
| timeBlocks = genStartAndEndTimes(startTime, endTime) | |
| result = [] | |
| # Break up 2 weeks into chunks below API limits | |
| for block in timeBlocks: | |
| rawdata = [] | |
| fromDate, toDate = block | |
| rawdata = metric[0].query(start_time=fromDate, end_time=toDate, | |
| statistics=statistics, | |
| period=300) | |
| if len(rawdata) == 0: | |
| continue | |
| # Sort by "Timestamp" | |
| rawdata.sort(key=lambda row:row["Timestamp"]) | |
| # Append data to results | |
| result.extend(rawdata) | |
| # Write out results | |
| outFileName = "%s_%s_%s_%s_%s.csv" % (datasetCode, | |
| region, | |
| role, | |
| instance.id, | |
| metricName) | |
| with open(outFileName, 'w') as fh: | |
| writer = csv.writer(fh) | |
| # Headers | |
| writer.writerow(result[0].keys()) | |
| writer.writerows([row.values() for row in result]) | |
| def genStartAndEndTimes(start, end): | |
| ''' | |
| Returns a list of tuples (start, end). | |
| start - datetime object - The start time of the full block | |
| end - datetime object - The end time of the full block | |
| The full block of time will be broken up into smaller blocks to deal with | |
| the request limits of the CloudWatch CLI tool. | |
| That limit appears to be ~1,400 records / request but is not published. Since | |
| we are collecting records at 5 minute intervals this means we will produce | |
| blocks a maximum of 5 * 1400 minutes long. | |
| ''' | |
| # Output string format | |
| # %f outputs 6 zeros so we hard code 3 here for compliance | |
| fmt = "%Y-%m-%dT%H:%M:%S.000Z" | |
| # Max time delta | |
| dt = 5 * 1400 | |
| partialEnd = start | |
| # The blocks of time we will return | |
| blocks = [] | |
| while partialEnd < end: | |
| # Find our new endpoint | |
| partialEnd = start + timedelta(minutes = dt) | |
| # Cap it | |
| if partialEnd > end: | |
| partialEnd = end | |
| blocks.append((start, partialEnd)) | |
| start = partialEnd | |
| return blocks | |
| def verifyUserInput(options): | |
| ''' | |
| Raises errors if options passed on command line are invalid | |
| ''' | |
| if not options.configFile: | |
| print "ERROR: -c is required to specify your config file." | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| # Create a parser for command line arguments | |
| parser = optparse.OptionParser(description = desc) | |
| # Add options as needed for this tool | |
| parser.add_option("-c", "--config", dest="configFile", | |
| help="The configuration file to use.") | |
| (options, args) = parser.parse_args() | |
| # Check for malformed or invalid inputs | |
| verifyUserInput(options) | |
| main(options) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment