Last active
June 14, 2017 12:52
-
-
Save aclisp/bf6da022409113ff099f49e6e032c8cf to your computer and use it in GitHub Desktop.
Send alarm for k8s Pods with heapster API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cap": [ | |
{ "app": "__default", "s2s": "friday_alert", "cpu": 250, "mem": 2147483648 }, | |
{ "app": "music-mobsrv", "s2s": "mobsrv", "cpu": 850, "mem": 2147483648 }, | |
{ "app": "music-entsrv", "s2s": "entsrv", "cpu": 850, "mem": 2147483648 }, | |
{ "app": "music-guild-service", "s2s": "guildservice", "cpu": 850, "mem": 2147483648 }, | |
{ "app": "music-entms", "s2s": "entms", "cpu": 850, "mem": 2147483648 }, | |
{ "app": "docker-registry-web", "s2s": "friday_alert", "cpu": 850, "mem": 2147483648 }, | |
{ "app": "music-mobflwapp", "s2s": "mobFlwApp", "cpu": 1000, "mem": 2147483648 }, | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import setproctitle # sudo pip install setproctitle | |
import yaml # sudo pip install PyYAML | |
import requests # sudo pip install requests | |
import json | |
import time | |
import pprint | |
import sys | |
from ctypes import * | |
import traceback | |
import os | |
import datetime | |
setproctitle.setproctitle('friday_alert') | |
dir_path = os.path.dirname(os.path.realpath(__file__)) | |
bam = cdll.LoadLibrary(dir_path + "/bamSDK.so") | |
bam.bam_initialize(c_char_p("friday_alert"), c_ushort(8082), c_uint(0), c_uint(0)) | |
pp = pprint.PrettyPrinter(indent=4) | |
# app - prefix of podname | |
# cpu - unit #cores*1000 | |
# mem - unit bytes | |
CAP = [ | |
{ "app": "__default", "s2s": "friday_alert", "cpu": 850, "mem": 2*1024*1024*1024 }, | |
] | |
with open(dir_path + "/../conf/cap.conf") as ff: | |
CAP = yaml.safe_load(ff)["cap"] | |
pp.pprint(CAP) | |
# Given the pod name, returns the cpu and mem numbers beyond which to alert | |
def get_threshold(cap, podname): | |
for record in cap: | |
if podname.startswith(record["app"]): | |
return record | |
return cap[0] | |
def pod_ip_img(podname): | |
master_url = "http://master.friday.yy.com/api/v1/namespaces/default/pods" | |
pod = requests.get(master_url + "/" + podname).json() | |
ip = pod["status"]["hostIP"] | |
img = pod["spec"]["containers"][0]["image"] | |
return ip, img | |
def alert(podname, bizname, msg): | |
bam.bam_error_log_watcher(c_int(3), c_char_p(podname), c_int(1), c_char_p(' <LOCATOR:{"bizname":"' + bizname + '"}/> ' + msg)) | |
def nowtime(): | |
return datetime.datetime.now().strftime("%H:%M%B%d ") | |
def CORE(cpu): | |
return cpu/10 | |
def GB(mem): | |
return mem/1024.0/1024/1024 | |
def IMG(podimg): | |
return podimg.split('/')[-1] | |
def process(): | |
podurl = "http://127.0.0.1:8082/api/v1/model/namespaces/default/pods" | |
podlist = requests.get(podurl).json() | |
for podname in podlist: | |
# CPU usage on all cores in millicores | |
cpu = requests.get(podurl + "/" + podname + "/metrics/cpu/usage_rate").json() | |
# Total working set usage. Working set is the memory being used and not easily dropped by the kernel | |
mem = requests.get(podurl + "/" + podname + "/metrics/memory/working_set").json() | |
cap = get_threshold(CAP, podname) | |
biz = cap["s2s"] | |
last1mcpu = cpu["metrics"][-1]["value"] | |
last1mmem = mem["metrics"][-1]["value"] | |
if last1mcpu > cap["cpu"] or last1mmem > cap["mem"]: | |
podip, podimg = pod_ip_img(podname) | |
if last1mcpu > cap["cpu"]: | |
msg = "进程CPU告警 {} {:.0f}% > {:.0f}% 镜像 {} POD {}".format(podip, CORE(last1mcpu), CORE(cap["cpu"]), IMG(podimg), podname) | |
print(nowtime() + biz + " " + msg) | |
alert(podname, biz, msg) | |
if last1mmem > cap["mem"]: | |
msg = "进程MEM告警 {} {:.1f}G > {:.1f}G 镜像 {} POD {}".format(podip, GB(last1mmem), GB(cap["mem"]), IMG(podimg), podname) | |
print(nowtime() + biz + " " + msg) | |
alert(podname, biz, msg) | |
while True: | |
try: | |
print("") | |
process() | |
sys.stdout.flush() | |
except: | |
print(sys.exc_info()) | |
traceback.print_tb(sys.exc_info()[2]) | |
time.sleep(15) | |
else: | |
time.sleep(60) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment