Skip to content

Instantly share code, notes, and snippets.

@teldridge11
Created February 13, 2019 13:48
Show Gist options
  • Save teldridge11/2b703e7c5aff701d6457b97c7bfcef34 to your computer and use it in GitHub Desktop.
Save teldridge11/2b703e7c5aff701d6457b97c7bfcef34 to your computer and use it in GitHub Desktop.
import matplotlib
import seaborn as sns
import pandas as pd
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import DBSCAN
from matplotlib import cm
from datetime import datetime
from dateutil import parser
import json
# Load cube data
with open('ec2_anomaly_data.json') as f:
ec2_anomaly_data = json.load(f)
with open('ec2_type_anomaly_data.json') as f:
ec2_type_anomaly_data = json.load(f)
with open('gcp_anomaly_data.json') as f:
gcp_anomaly_data = json.load(f)
with open('vm_anomaly_data.json') as f:
vm_anomaly_data = json.load(f)
with open('vm_type_anomaly_data.json') as f:
vm_type_anomaly_data = json.load(f)
# Load asset data
with open('1_AwsInstance_data.json') as f:
aws_instance_asset_data = json.load(f)
with open('1_AzureVm_data.json') as f:
azure_vm_asset_data = json.load(f)
with open('1_GcpComputeInstance_data.json') as f:
gcp_instance_asset_data = json.load(f)
def aws_anomaly_analysis(cube_data, ec2_type_anomaly_data):
cost_data = []
instance_count_data = []
instance_type_data = []
for i,number_of_instances in enumerate(cube_data["number_of_instances"]):
instance_count_data.append([i,number_of_instances])
for i,compute_cost in enumerate(cube_data["total_cost"]):
cost_data.append([i,compute_cost])
for i,instance_types in enumerate(ec2_type_anomaly_data["number_of_instance_types"]):
instance_type_data.append([i,instance_types])
detect_cost_anomalies("aws",cost_data)
detect_count_anomalies("aws",instance_count_data)
detect_type_anomalies("aws", instance_type_data, aws_instance_asset_data)
def azure_anomaly_analysis(cube_data, vm_type_anomaly_data):
cost_data = []
vm_count_data = []
vm_type_data = []
for i,number_of_vms in enumerate(cube_data["number_of_vms"]):
vm_count_data.append([i,number_of_vms])
for i,vm_cost in enumerate(cube_data["total_cost"]):
cost_data.append([i,vm_cost])
for i,vm_types in enumerate(vm_type_anomaly_data["number_of_vm_types"]):
vm_type_data.append([i,vm_types])
detect_cost_anomalies("azure",cost_data)
detect_count_anomalies("azure",vm_count_data)
detect_type_anomalies("azure", vm_type_data, azure_vm_asset_data)
def gcp_anomaly_analysis(cube_data):
cost_data = []
instance_count_data = []
for i,number_of_instances in enumerate(cube_data["number_of_instances"]):
instance_count_data.append([i,number_of_instances])
for i,compute_cost in enumerate(cube_data["total_cost"]):
cost_data.append([i,compute_cost])
detect_cost_anomalies("gcp",cost_data)
detect_count_anomalies("gcp",instance_count_data)
# detect_type_anomalies()
def detect_cost_anomalies(cloud, cost_data):
# Generate outlier data
raw_data = cost_data
cost_data = pd.DataFrame(cost_data)
cost_data[1] = zscore(cost_data[1])
cost_data["is_outlier"] = cost_data[1].apply(lambda x: x <= -3 or x >= 3)
cost_data[cost_data["is_outlier"]]
# Create data frame
scaler = MinMaxScaler()
dayAndCost = cost_data[[0, 1]]
dayAndCost = scaler.fit_transform(dayAndCost)
dayAndCost = pd.DataFrame(dayAndCost, columns = [0,1])
# Create prediction model for outliers
outlier_detection = DBSCAN(eps = 0.3,metric="euclidean",min_samples = 3,n_jobs = -1)
clusters = outlier_detection.fit_predict(dayAndCost)
# Create and display plot
cmap = cm.get_cmap('Accent')
dayAndCost.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False)
# matplotlib.pyplot.show()
# Create anomaly data
anomaly_data = []
description = "Cost is different than normal"
count = 1
severity = "CRITICAL"
for i, anomaly in enumerate(clusters):
if anomaly:
anomaly = {}
anomaly["cloud"] = cloud
anomaly["description"] = description
anomaly["count"] = count
anomaly["severity"] = severity
cost_data_point = raw_data[i][1]
prev_cost_data_point = raw_data[i-1][1]
if cost_data_point > prev_cost_data_point:
verb = "increased"
else:
verb = "decreased"
growth_rate = (cost_data_point / prev_cost_data_point) * 100.0
cost_increase = int(round(growth_rate * prev_cost_data_point))
anomaly["impact"] = "Your instance cost " + verb + " " + str(cost_increase) + " dollars in one day"
anomaly_data.append(anomaly)
write_to_file(anomaly_data, "anomalies.json")
def detect_count_anomalies(cloud, count_data):
# Generate outlier data
raw_count_data = count_data
count_data = pd.DataFrame(count_data)
count_data[1] = zscore(count_data[1])
count_data["is_outlier"] = count_data[1].apply(lambda x: x <= -3 or x >= 3)
count_data[count_data["is_outlier"]]
# Create data frame
scaler = MinMaxScaler()
dayAndCount = count_data[[0, 1]]
dayAndCount = scaler.fit_transform(dayAndCount)
dayAndCount = pd.DataFrame(dayAndCount, columns = [0,1])
# Create prediction model for outliers
outlier_detection = DBSCAN(eps = 0.2,metric="euclidean",min_samples = 3,n_jobs = -1)
clusters = outlier_detection.fit_predict(dayAndCount)
# Create and display plot
cmap = cm.get_cmap('Accent')
dayAndCount.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False)
# matplotlib.pyplot.show()
# Create anomaly data
anomaly_data = []
description = "Instance count is different than normal"
count = 1
severity = "CRITICAL"
if cloud == "azure":
term = "VM"
else:
term = "Instance"
for i, anomaly in enumerate(clusters):
if anomaly:
anomaly = {}
anomaly["cloud"] = cloud
anomaly["description"] = description
anomaly["count"] = count
anomaly["severity"] = severity
count_data_point = raw_count_data[i][1]
prev_count_data_point = raw_count_data[i-1][1]
if count_data_point > prev_count_data_point:
verb = "increased"
else:
verb = "decreased"
count_increase = int(round(count_data_point - prev_count_data_point))
anomaly["impact"] = "Your " + term + " count " + verb + " by " + str(count_increase) + " " + term + "s in the last 15 minutes"
anomaly_data.append(anomaly)
write_to_file(anomaly_data, "anomalies.json")
def detect_type_anomalies(cloud, type_data, asset_data):
# Get type count from last 15 collection cycle and append to existing cube data
if cloud == "aws":
realtime_type_count = asset_data["ec2_instance_type"]
elif cloud == "azure":
realtime_type_count = asset_data["vm_instance_type"]
elif cloud == "gcp":
realtime_type_count = asset_data["gcp_instance_type"]
type_data.append([len(type_data), realtime_type_count])
# Generate outlier data
raw_type_data = type_data
type_data = pd.DataFrame(type_data)
type_data[1] = zscore(type_data[1])
type_data["is_outlier"] = type_data[1].apply(lambda x: x <= -3 or x >= 3)
type_data[type_data["is_outlier"]]
# Create data frame
scaler = MinMaxScaler()
dayAndCount = type_data[[0, 1]]
dayAndCount = scaler.fit_transform(dayAndCount)
dayAndCount = pd.DataFrame(dayAndCount, columns = [0,1])
# Create prediction model for outliers
outlier_detection = DBSCAN(eps = 0.2,metric="euclidean",min_samples = 3,n_jobs = -1)
clusters = outlier_detection.fit_predict(dayAndCount)
# Create and display plot
cmap = cm.get_cmap('Accent')
dayAndCount.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False)
# matplotlib.pyplot.show()
# Create anomaly data
anomaly_data = []
description = "Instance type count is different than normal"
count = 1
severity = "CRITICAL"
if cloud == "azure":
term = "VM"
else:
term = "Instance"
for i, anomaly in enumerate(clusters):
if anomaly:
anomaly = {}
anomaly["cloud"] = cloud
anomaly["description"] = description
anomaly["count"] = count
anomaly["severity"] = severity
count_data_point = raw_type_data[i][1]
prev_count_data_point = raw_type_data[i-1][1]
if count_data_point > prev_count_data_point:
verb = "increased"
else:
verb = "decreased"
count_increase = int(round(count_data_point - prev_count_data_point))
anomaly["impact"] = "Your " + term + " count " + verb + " by " + str(count_increase) + " " + term + "s in the last 15 minutes"
anomaly_data.append(anomaly)
write_to_file(anomaly_data, "anomalies.json")
def write_to_file(data, filename):
with open(filename, 'a') as outfile:
json.dump(data, outfile)
clouds = ["aws", "azure", "gcp"]
for cloud in clouds:
if cloud == "aws":
aws_anomaly_analysis(ec2_anomaly_data, ec2_type_anomaly_data)
elif cloud == "azure":
azure_anomaly_analysis(vm_anomaly_data, vm_type_anomaly_data)
elif cloud == "gcp":
gcp_anomaly_analysis(gcp_anomaly_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment