Created
February 12, 2019 20:56
-
-
Save teldridge11/cd8d446e3529ad711fc33fa1cd623937 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib | |
import seaborn as sns | |
import pandas as pd | |
from scipy.stats import zscore | |
from sklearn.preprocessing import MinMaxScaler | |
from sklearn.cluster import DBSCAN | |
from matplotlib import cm | |
from datetime import datetime | |
from dateutil import parser | |
import json | |
# Kick off ruby rake task to pull data and store in tmp/data.json file | |
# Once data is loaded, spawn python process | |
# Ruby script loops per 15 secs waiting for tmp/anomalies.json to show up | |
# Python script loads data.json file into array | |
# analyze data for anomalies | |
# dump results into json file | |
# Ruby rake task sees file, stops looping | |
# Rake task loads anomaly data into db | |
# Load data | |
with open('data.json') as f: | |
data = json.load(f) | |
cost_data = [] | |
instance_count_data = [] | |
for i,d in enumerate(data): | |
attributes = d["attributes"] | |
day = attributes["day"] | |
instance_type = attributes["instance_type"] | |
values = d["values"] | |
number_of_instances = values["number_of_instances"] | |
compute_cost = values["compute_cost"] | |
timestamp = int(parser.parse(day).strftime("%s")) | |
cost_data.append([i,compute_cost]) | |
instance_count_data.append([i,number_of_instances]) | |
# COST ANOMALY DETECTION | |
# Generate outlier data | |
raw_data = cost_data | |
cost_data = pd.DataFrame(cost_data) | |
cost_data[1] = zscore(cost_data[1]) | |
cost_data["is_outlier"] = cost_data[1].apply(lambda x: x <= -3 or x >= 3) | |
cost_data[cost_data["is_outlier"]] | |
# Create data frame | |
scaler = MinMaxScaler() | |
dayAndCost = cost_data[[0, 1]] | |
dayAndCost = scaler.fit_transform(dayAndCost) | |
dayAndCost = pd.DataFrame(dayAndCost, columns = [0,1]) | |
# Create prediction model for outliers | |
outlier_detection = DBSCAN(eps = 0.3,metric="euclidean",min_samples = 3,n_jobs = -1) | |
clusters = outlier_detection.fit_predict(dayAndCost) | |
# Create and display plot | |
cmap = cm.get_cmap('Accent') | |
dayAndCost.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False) | |
# matplotlib.pyplot.show() | |
# Create anomaly data | |
anomaly_data = [] | |
cloud = "Azure" | |
description = "Cost was much higher than normal" | |
count = 1 | |
severity = "SEVERE" | |
for i, anomaly in enumerate(clusters): | |
if anomaly: | |
anomaly = {} | |
anomaly["cloud"] = cloud | |
anomaly["description"] = description | |
anomaly["count"] = count | |
anomaly["severity"] = severity | |
cost_data_point = raw_data[i][1] | |
prev_cost_data_point = raw_data[i-1][1] | |
if cost_data_point > prev_cost_data_point: | |
verb = "increased" | |
else: | |
verb = "decreased" | |
growth_rate = (cost_data_point / prev_cost_data_point) * 100.0 | |
cost_increase = int(round(growth_rate * prev_cost_data_point)) | |
anomaly["impact"] = "Your instance cost " + verb + " " + str(cost_increase) + " dollars in one day" | |
anomaly_data.append(anomaly) | |
# INSTANCE COUNT ANOMALIES | |
# Generate outlier data | |
raw_instance_count_data = instance_count_data | |
instance_count_data = pd.DataFrame(instance_count_data) | |
instance_count_data[1] = zscore(instance_count_data[1]) | |
instance_count_data["is_outlier"] = instance_count_data[1].apply(lambda x: x <= -3 or x >= 3) | |
instance_count_data[instance_count_data["is_outlier"]] | |
# Create data frame | |
scaler = MinMaxScaler() | |
dayAndCount = instance_count_data[[0, 1]] | |
dayAndCount = scaler.fit_transform(dayAndCount) | |
dayAndCount = pd.DataFrame(dayAndCount, columns = [0,1]) | |
# Create prediction model for outliers | |
outlier_detection = DBSCAN(eps = 0.2,metric="euclidean",min_samples = 3,n_jobs = -1) | |
clusters = outlier_detection.fit_predict(dayAndCount) | |
# Create and display plot | |
cmap = cm.get_cmap('Accent') | |
dayAndCount.plot.scatter(x = 0,y = 1,c = clusters,cmap = cmap,colorbar = False) | |
# matplotlib.pyplot.show() | |
# Create anomaly data | |
cloud = "Azure" | |
description = "Instance count was much higher than normal" | |
count = 1 | |
severity = "SEVERE" | |
if cloud == "Azure": | |
term = "VM" | |
else: | |
term = "Instance" | |
for i, anomaly in enumerate(clusters): | |
if anomaly: | |
anomaly = {} | |
anomaly["cloud"] = cloud | |
anomaly["description"] = description | |
anomaly["count"] = count | |
anomaly["severity"] = severity | |
count_data_point = raw_instance_count_data[i][1] | |
prev_count_data_point = raw_instance_count_data[i-1][1] | |
if count_data_point > prev_count_data_point: | |
verb = "increased" | |
else: | |
verb = "decreased" | |
count_increase = int(round(count_data_point - prev_count_data_point)) | |
anomaly["impact"] = "Your " + term + " count " + verb + " by " + str(count_increase) + " " + term + "s in the last 15 minutes" | |
anomaly_data.append(anomaly) | |
# REGION ANOMALY DETECTION | |
# INSTANCE TYPE ANOMALY DETECTION | |
# Write anomaly data to anomalies.json | |
with open('anomalies.json', 'w') as outfile: | |
json.dump(anomaly_data, outfile) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment