-
-
Save teldridge11/2b703e7c5aff701d6457b97c7bfcef34 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib | |
import seaborn as sns | |
import pandas as pd | |
from scipy.stats import zscore | |
from sklearn.preprocessing import MinMaxScaler | |
from sklearn.cluster import DBSCAN | |
from matplotlib import cm | |
from datetime import datetime | |
from dateutil import parser | |
import json | |
# Load cube data | |
with open('ec2_anomaly_data.json') as f: | |
ec2_anomaly_data = json.load(f) | |
with open('ec2_type_anomaly_data.json') as f: | |
ec2_type_anomaly_data = json.load(f) | |
with open('gcp_anomaly_data.json') as f: | |
gcp_anomaly_data = json.load(f) | |
with open('vm_anomaly_data.json') as f: | |
vm_anomaly_data = json.load(f) | |
with open('vm_type_anomaly_data.json') as f: | |
vm_type_anomaly_data = json.load(f) | |
# Load asset data | |
with open('1_AwsInstance_data.json') as f: | |
aws_instance_asset_data = json.load(f) | |
with open('1_AzureVm_data.json') as f: | |
azure_vm_asset_data = json.load(f) | |
with open('1_GcpComputeInstance_data.json') as f: | |
gcp_instance_asset_data = json.load(f) | |
def aws_anomaly_analysis(cube_data, ec2_type_anomaly_data): | |
cost_data = [] | |
instance_count_data = [] | |
instance_type_data = [] | |
for i,number_of_instances in enumerate(cube_data["number_of_instances"]): | |
instance_count_data.append([i,number_of_instances]) | |
for i,compute_cost in enumerate(cube_data["total_cost"]): | |
cost_data.append([i,compute_cost]) | |
for i,instance_types in enumerate(ec2_type_anomaly_data["number_of_instance_types"]): | |
instance_type_data.append([i,instance_types]) | |
detect_cost_anomalies("aws",cost_data) | |
detect_count_anomalies("aws",instance_count_data) | |
detect_type_anomalies("aws", instance_type_data, aws_instance_asset_data) | |
def azure_anomaly_analysis(cube_data, vm_type_anomaly_data): | |
cost_data = [] | |
vm_count_data = [] | |
vm_type_data = [] | |
for i,number_of_vms in enumerate(cube_data["number_of_vms"]): | |
vm_count_data.append([i,number_of_vms]) | |
for i,vm_cost in enumerate(cube_data["total_cost"]): | |
cost_data.append([i,vm_cost]) | |
for i,vm_types in enumerate(vm_type_anomaly_data["number_of_vm_types"]): | |
vm_type_data.append([i,vm_types]) | |
detect_cost_anomalies("azure",cost_data) | |
detect_count_anomalies("azure",vm_count_data) | |
detect_type_anomalies("azure", vm_type_data, azure_vm_asset_data) | |
def gcp_anomaly_analysis(cube_data): | |
cost_data = [] | |
instance_count_data = [] | |
for i,number_of_instances in enumerate(cube_data["number_of_instances"]): | |
instance_count_data.append([i,number_of_instances]) | |
for i,compute_cost in enumerate(cube_data["total_cost"]): | |
cost_data.append([i,compute_cost]) | |
detect_cost_anomalies("gcp",cost_data) | |
detect_count_anomalies("gcp",instance_count_data) | |
# detect_type_anomalies() | |
def detect_cost_anomalies(cloud, cost_data): | |
# Generate outlier data | |
raw_data = cost_data | |
cost_data = pd.DataFrame(cost_data) | |
cost_data[1] = zscore(cost_data[1]) | |
cost_data["is_outlier"] = cost_data[1].apply(lambda x: x <= -3 or x >= 3) | |
cost_data[cost_data["is_outlier"]] | |
# Create data frame | |
scaler = MinMaxScaler() | |
dayAndCost = cost_data[[0, 1]] | |
dayAndCost = scaler.fit_transform(dayAndCost) | |
dayAndCost = pd.DataFrame(dayAndCost, columns = [0,1]) | |
# Create prediction model for outliers | |
outlier_detection = DBSCAN(eps = 0.3,metric="euclidean",min_samples = 3,n_jobs = -1) | |
clusters = outlier_detection.fit_predict(dayAndCost) | |
# Create and display plot | |
cmap = cm.get_cmap('Accent') | |
dayAndCost.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False) | |
# matplotlib.pyplot.show() | |
# Create anomaly data | |
anomaly_data = [] | |
description = "Cost is different than normal" | |
count = 1 | |
severity = "CRITICAL" | |
for i, anomaly in enumerate(clusters): | |
if anomaly: | |
anomaly = {} | |
anomaly["cloud"] = cloud | |
anomaly["description"] = description | |
anomaly["count"] = count | |
anomaly["severity"] = severity | |
cost_data_point = raw_data[i][1] | |
prev_cost_data_point = raw_data[i-1][1] | |
if cost_data_point > prev_cost_data_point: | |
verb = "increased" | |
else: | |
verb = "decreased" | |
growth_rate = (cost_data_point / prev_cost_data_point) * 100.0 | |
cost_increase = int(round(growth_rate * prev_cost_data_point)) | |
anomaly["impact"] = "Your instance cost " + verb + " " + str(cost_increase) + " dollars in one day" | |
anomaly_data.append(anomaly) | |
write_to_file(anomaly_data, "anomalies.json") | |
def detect_count_anomalies(cloud, count_data): | |
# Generate outlier data | |
raw_count_data = count_data | |
count_data = pd.DataFrame(count_data) | |
count_data[1] = zscore(count_data[1]) | |
count_data["is_outlier"] = count_data[1].apply(lambda x: x <= -3 or x >= 3) | |
count_data[count_data["is_outlier"]] | |
# Create data frame | |
scaler = MinMaxScaler() | |
dayAndCount = count_data[[0, 1]] | |
dayAndCount = scaler.fit_transform(dayAndCount) | |
dayAndCount = pd.DataFrame(dayAndCount, columns = [0,1]) | |
# Create prediction model for outliers | |
outlier_detection = DBSCAN(eps = 0.2,metric="euclidean",min_samples = 3,n_jobs = -1) | |
clusters = outlier_detection.fit_predict(dayAndCount) | |
# Create and display plot | |
cmap = cm.get_cmap('Accent') | |
dayAndCount.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False) | |
# matplotlib.pyplot.show() | |
# Create anomaly data | |
anomaly_data = [] | |
description = "Instance count is different than normal" | |
count = 1 | |
severity = "CRITICAL" | |
if cloud == "azure": | |
term = "VM" | |
else: | |
term = "Instance" | |
for i, anomaly in enumerate(clusters): | |
if anomaly: | |
anomaly = {} | |
anomaly["cloud"] = cloud | |
anomaly["description"] = description | |
anomaly["count"] = count | |
anomaly["severity"] = severity | |
count_data_point = raw_count_data[i][1] | |
prev_count_data_point = raw_count_data[i-1][1] | |
if count_data_point > prev_count_data_point: | |
verb = "increased" | |
else: | |
verb = "decreased" | |
count_increase = int(round(count_data_point - prev_count_data_point)) | |
anomaly["impact"] = "Your " + term + " count " + verb + " by " + str(count_increase) + " " + term + "s in the last 15 minutes" | |
anomaly_data.append(anomaly) | |
write_to_file(anomaly_data, "anomalies.json") | |
def detect_type_anomalies(cloud, type_data, asset_data): | |
# Get type count from last 15 collection cycle and append to existing cube data | |
if cloud == "aws": | |
realtime_type_count = asset_data["ec2_instance_type"] | |
elif cloud == "azure": | |
realtime_type_count = asset_data["vm_instance_type"] | |
elif cloud == "gcp": | |
realtime_type_count = asset_data["gcp_instance_type"] | |
type_data.append([len(type_data), realtime_type_count]) | |
# Generate outlier data | |
raw_type_data = type_data | |
type_data = pd.DataFrame(type_data) | |
type_data[1] = zscore(type_data[1]) | |
type_data["is_outlier"] = type_data[1].apply(lambda x: x <= -3 or x >= 3) | |
type_data[type_data["is_outlier"]] | |
# Create data frame | |
scaler = MinMaxScaler() | |
dayAndCount = type_data[[0, 1]] | |
dayAndCount = scaler.fit_transform(dayAndCount) | |
dayAndCount = pd.DataFrame(dayAndCount, columns = [0,1]) | |
# Create prediction model for outliers | |
outlier_detection = DBSCAN(eps = 0.2,metric="euclidean",min_samples = 3,n_jobs = -1) | |
clusters = outlier_detection.fit_predict(dayAndCount) | |
# Create and display plot | |
cmap = cm.get_cmap('Accent') | |
dayAndCount.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False) | |
# matplotlib.pyplot.show() | |
# Create anomaly data | |
anomaly_data = [] | |
description = "Instance type count is different than normal" | |
count = 1 | |
severity = "CRITICAL" | |
if cloud == "azure": | |
term = "VM" | |
else: | |
term = "Instance" | |
for i, anomaly in enumerate(clusters): | |
if anomaly: | |
anomaly = {} | |
anomaly["cloud"] = cloud | |
anomaly["description"] = description | |
anomaly["count"] = count | |
anomaly["severity"] = severity | |
count_data_point = raw_type_data[i][1] | |
prev_count_data_point = raw_type_data[i-1][1] | |
if count_data_point > prev_count_data_point: | |
verb = "increased" | |
else: | |
verb = "decreased" | |
count_increase = int(round(count_data_point - prev_count_data_point)) | |
anomaly["impact"] = "Your " + term + " count " + verb + " by " + str(count_increase) + " " + term + "s in the last 15 minutes" | |
anomaly_data.append(anomaly) | |
write_to_file(anomaly_data, "anomalies.json") | |
def write_to_file(data, filename): | |
with open(filename, 'a') as outfile: | |
json.dump(data, outfile) | |
clouds = ["aws", "azure", "gcp"] | |
for cloud in clouds: | |
if cloud == "aws": | |
aws_anomaly_analysis(ec2_anomaly_data, ec2_type_anomaly_data) | |
elif cloud == "azure": | |
azure_anomaly_analysis(vm_anomaly_data, vm_type_anomaly_data) | |
elif cloud == "gcp": | |
gcp_anomaly_analysis(gcp_anomaly_data) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment