Created
February 4, 2015 05:23
-
-
Save hustlijian/416e006098a22f19dc83 to your computer and use it in GitHub Desktop.
linux 性能信息收集(阿里)(linux server performance info collect script)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# | |
# site: http://outofmemory.cn/code-snippet/35902/python-collect-server-performance-info | |
# introduction: collect linux server performance info | |
# | |
######################################### | |
# Function: sample linux performance indices | |
# Usage: python sampler.py | |
# Author: CMS DEV TEAM | |
# Company: Aliyun Inc. | |
# Version: 1.1 | |
######################################### | |
import os | |
import os.path | |
import sys | |
import time | |
import operator | |
import httplib | |
import logging | |
import socket | |
import random | |
from shutil import copyfile | |
from subprocess import Popen, PIPE | |
from logging.handlers import RotatingFileHandler | |
logger = None | |
REMOTE_HOST = None | |
REMOTE_PORT = None | |
REMOTE_MONITOR_URI = None | |
UUID = None | |
def get_mem_usage_percent(): | |
try: | |
f = open('/proc/meminfo', 'r') | |
for line in f: | |
if line.startswith('MemTotal:'): | |
mem_total = int(line.split()[1]) | |
elif line.startswith('MemFree:'): | |
mem_free = int(line.split()[1]) | |
elif line.startswith('Buffers:'): | |
mem_buffer = int(line.split()[1]) | |
elif line.startswith('Cached:'): | |
mem_cache = int(line.split()[1]) | |
elif line.startswith('SwapTotal:'): | |
vmem_total = int(line.split()[1]) | |
elif line.startswith('SwapFree:'): | |
vmem_free = int(line.split()[1]) | |
else: | |
continue | |
f.close() | |
except: | |
return None | |
physical_percent = usage_percent(mem_total - (mem_free + mem_buffer + mem_cache), mem_total) | |
virtual_percent = 0 | |
if vmem_total > 0: | |
virtual_percent = usage_percent((vmem_total - vmem_free), vmem_total) | |
return physical_percent, virtual_percent | |
black_list = ('iso9660',) | |
def usage_percent(use, total): | |
try: | |
ret = (float(use) / total) * 100 | |
except ZeroDivisionError: | |
raise Exception("ERROR - zero division error") | |
return ret | |
def get_disk_partition(): | |
return_list = [] | |
pd = [] | |
try: | |
f = open("/proc/filesystems", "r") | |
for line in f: | |
if not line.startswith("nodev"): | |
fs_type = line.strip() | |
if fs_type not in black_list: | |
pd.append(fs_type) | |
f.close() | |
f = open('/etc/mtab', "r") | |
for line in f: | |
if line.startswith('none'): | |
continue | |
tmp = line.strip().split() | |
ft = tmp[2] | |
if ft not in pd: | |
continue | |
return_list.append(tmp[1]) | |
f.close() | |
except: | |
return None | |
return return_list | |
def check_disk(): | |
try: | |
return_dict = {} | |
p_list = get_disk_partition() | |
for i in p_list: | |
dt = os.statvfs(i) | |
use = (dt.f_blocks - dt.f_bfree) * dt.f_frsize | |
all = dt.f_blocks * dt.f_frsize | |
return_dict[i] = ('%.2f' % (usage_percent(use, all),), ('%.2f' % (all * 1.0 / (1024 * 1000000)))) | |
except: | |
return None | |
return return_dict | |
_CLOCK_TICKS = os.sysconf("SC_CLK_TCK") | |
def get_cpu_time(): | |
need_sleep = False | |
if not os.path.isfile('/tmp/cpu_stat') or os.path.getsize('/tmp/cpu_stat') == 0: | |
copyfile('/proc/stat', '/tmp/cpu_stat') | |
need_sleep = True | |
try: | |
f1 = open('/tmp/cpu_stat', 'r') | |
values1 = f1.readline().split() | |
total_time1 = 0 | |
for i in values1[1:]: | |
total_time1 += int(i) | |
idle_time1 = int(values1[4]) | |
iowait_time1 = int(values1[5]) | |
finally: | |
f1.close() | |
if need_sleep: | |
time.sleep(1) | |
f2 = open('/proc/stat', 'r') | |
try: | |
values2 = f2.readline().split() | |
total_time2 = 0 | |
for i in values2[1:]: | |
total_time2 += int(i) | |
idle_time2 = int(values2[4]) | |
iowait_time2 = int(values2[5]) | |
finally: | |
f2.close() | |
idle_time = idle_time2 - idle_time1 | |
iowait_time = iowait_time2 - iowait_time1 | |
total_time = total_time2 - total_time1 | |
cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time) | |
# compensate logic | |
if total_time < 0 or idle_time < 0 or iowait_time < 0 or cpu_percentage < 0 or cpu_percentage > 100: | |
time.sleep(1) | |
f3 = open('/proc/stat', 'r') | |
try: | |
values3 = f3.readline().split() | |
total_time3 = 0 | |
for i in values3[1:]: | |
total_time3 += int(i) | |
idle_time3 = int(values3[4]) | |
iowait_time3 = int(values3[5]) | |
finally: | |
f3.close() | |
idle_time = idle_time3 - idle_time2 | |
iowait_time = iowait_time3 - iowait_time2 | |
total_time = total_time3 - total_time2 | |
cpu_percentage = int(100.0 * (total_time - idle_time - iowait_time) / total_time) | |
copyfile('/proc/stat', '/tmp/cpu_stat') | |
return cpu_percentage | |
def network_io_kbitps(): | |
"""Return network I/O statistics for every network interface | |
installed on the system as a dict of raw tuples. | |
""" | |
f1 = open("/proc/net/dev", "r") | |
try: | |
lines1 = f1.readlines() | |
finally: | |
f1.close() | |
retdict1 = {} | |
for line1 in lines1[2:]: | |
colon1 = line1.find(':') | |
assert colon1 > 0, line1 | |
name1 = line1[:colon1].strip() | |
fields1 = line1[colon1 + 1:].strip().split() | |
bytes_recv1 = float('%.4f' % (float(fields1[0]) * 0.0078125)) | |
bytes_sent1 = float('%.4f' % (float(fields1[8]) * 0.0078125)) | |
retdict1[name1] = (bytes_recv1, bytes_sent1) | |
time.sleep(1) | |
f2 = open("/proc/net/dev", "r") | |
try: | |
lines2 = f2.readlines() | |
finally: | |
f2.close() | |
retdict2 = {} | |
for line2 in lines2[2:]: | |
colon2 = line2.find(':') | |
assert colon2 > 0, line2 | |
name2 = line2[:colon2].strip() | |
fields2 = line2[colon2 + 1:].strip().split() | |
bytes_recv2 = float('%.4f' % (float(fields2[0]) * 0.0078125)) | |
bytes_sent2 = float('%.4f' % (float(fields2[8]) * 0.0078125)) | |
retdict2[name2] = (bytes_recv2, bytes_sent2) | |
retdict = merge_with(retdict2, retdict1) | |
return retdict | |
def disk_io_Kbps(): | |
iostat = Popen("iostat -d -k 1 2 | sed '/Device\|Linux\|^$/d' > /tmp/disk_io", shell=True, stdout=PIPE, stderr=PIPE) | |
iostat_error = iostat.communicate()[1].strip() | |
if iostat_error: | |
logger.error("iostat not exists, %s" % iostat_error) | |
return None | |
retdict = {} | |
exception = None | |
try: | |
try: | |
f = open('/tmp/disk_io', 'r') | |
except Exception, ex: | |
exception = ex | |
logger.error(exception) | |
if exception: | |
return None | |
lines = f.readlines() | |
for line in lines: | |
name, _, readkps, writekps, _, _, = line.split() | |
if name: | |
readkps = float(readkps) | |
writekps = float(writekps) | |
retdict[name] = (readkps, writekps) | |
return retdict | |
finally: | |
f.close() | |
def merge_with(d1, d2, fn=lambda x, y: tuple(map(operator.sub, x, y))): | |
res = d1.copy() # "= dict(d1)" for lists of tuples | |
for key, val in d2.iteritems(): # ".. in d2" for lists of tuples | |
try: | |
res[key] = fn(res[key], val) | |
except KeyError: | |
res[key] = val | |
return res | |
def get_load(): | |
try: | |
f = open('/proc/loadavg', 'r') | |
tmp = f.readline().split() | |
lavg_1 = float(tmp[0]) | |
lavg_5 = float(tmp[1]) | |
lavg_15 = float(tmp[2]) | |
f.close() | |
except: | |
return None | |
return lavg_1, lavg_5, lavg_15 | |
def get_tcp_status(): | |
check_cmd = "command -v ss" | |
check_proc = Popen(check_cmd, shell=True, stdout=PIPE) | |
ss = check_proc.communicate()[0].rstrip('\n') | |
if ss: | |
cmd = "ss -ant | awk '{if(NR != 1) print $1}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | sed 's/-/_/g' | sed 's/ESTAB=/ESTABLISHED=/g' | sed 's/FIN_WAIT_/FIN_WAIT/g'" | |
else: | |
cmd = "netstat -anp | grep tcp | awk '{print $6}' | awk '{state=$1;arr[state]++} END{for(i in arr){printf \"%s=%s \", i,arr[i]}}' | tail -n 1" | |
tcp_proc = Popen(cmd, shell=True, stdout=PIPE) | |
tcp_status = tcp_proc.communicate()[0].rstrip('\n') | |
return tcp_status | |
def get_proc_number(): | |
cmd = "ps axu | wc -l | tail -n 1" | |
proc_func = Popen(cmd, shell=True, stdout=PIPE) | |
proc_number = proc_func.communicate()[0].rstrip('\n') | |
return proc_number | |
def all_index(): | |
return ( | |
int(time.time() * 1000), | |
get_cpu_time(), | |
get_mem_usage_percent(), | |
check_disk(), | |
disk_io_Kbps(), | |
network_io_kbitps(), | |
get_load(), | |
get_tcp_status(), | |
get_proc_number() | |
) | |
def collector(): | |
timestamp, cpu, mem, disk, disk_io, net, load, tcp_status, process_number = all_index() | |
disk_utilization = '' | |
disk_io_read = '' | |
disk_io_write = '' | |
internet_networkrx = '' | |
internet_networktx = '' | |
tcp_status_count = '' | |
period_1 = '' | |
period_5 = '' | |
period_15 = '' | |
if UUID: | |
cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID | |
memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s\n' % UUID | |
if load: | |
period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=1min\n' % UUID | |
period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=5min\n' % UUID | |
period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count' + ' instanceId=%s period=15min\n' % UUID | |
if disk: | |
for name, value in disk.items(): | |
disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent instanceId=%s mountpoint=%s\n' % (UUID, name) | |
if disk_io: | |
for name, value in disk_io.items(): | |
disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name) | |
disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second instanceId=%s diskname=%s\n' % (UUID, name) | |
for name, value in net.items(): | |
internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name) | |
internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second instanceId=%s netname=%s\n' % (UUID, name) | |
if tcp_status: | |
status_count = tcp_status.split() | |
for element in status_count: | |
key_value = element.split('=') | |
tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count instanceId=%s state=%s\n' % (UUID, key_value[0]) | |
process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count instanceId=%s\n' % UUID | |
else: | |
cpu_utilization = 'vm.CPUUtilization ' + str(timestamp) + ' ' + str(cpu) + ' ns=ACS/ECS unit=Percent\n' | |
memory_utilization = 'vm.MemoryUtilization ' + str(timestamp) + ' ' + str(mem[0]) + ' ns=ACS/ECS unit=Percent\n' | |
if load: | |
period_1 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[0]) + ' ns=ACS/ECS unit=count period=1min\n' | |
period_5 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[1]) + ' ns=ACS/ECS unit=count period=5min\n' | |
period_15 = 'vm.LoadAverage ' + str(timestamp) + ' ' + str(load[2]) + ' ns=ACS/ECS unit=count period=15min\n' | |
if disk: | |
for name, value in disk.items(): | |
disk_utilization = disk_utilization + 'vm.DiskUtilization ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Percent mountpoint=%s\n' % name | |
if disk_io: | |
for name, value in disk_io.items(): | |
disk_io_read = disk_io_read + 'vm.DiskIORead ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name | |
disk_io_write = disk_io_write + 'vm.DiskIOWrite ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobytes/Second diskname=%s\n' % name | |
for name, value in net.items(): | |
internet_networkrx = internet_networkrx + 'vm.InternetNetworkRX ' + str(timestamp) + ' ' + str(value[0]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name | |
internet_networktx = internet_networktx + 'vm.InternetNetworkTX ' + str(timestamp) + ' ' + str(value[1]) + ' ns=ACS/ECS unit=Kilobits/Second netname=%s\n' % name | |
if tcp_status: | |
status_count = tcp_status.split() | |
for element in status_count: | |
key_value = element.split('=') | |
tcp_status_count = tcp_status_count + 'vm.TcpCount ' + str(timestamp) + ' ' + key_value[1] + ' ns=ACS/ECS unit=Count state=%s\n' % key_value[0] | |
process_count = 'vm.ProcessCount ' + str(timestamp) + ' ' + process_number + ' ns=ACS/ECS unit=Count\n' | |
data_post = cpu_utilization + memory_utilization + period_1 + period_5 + period_15 + disk_utilization + disk_io_read + disk_io_write + internet_networkrx + internet_networktx + tcp_status_count + process_count | |
print data_post | |
interval = random.randint(0, 5000) | |
time.sleep(interval / 1000.0) | |
headers = {"Content-Type": "text/plain", "Accept": "text/plain"} | |
exception = None | |
http_client = None | |
try: | |
try: | |
http_client = httplib.HTTPConnection(REMOTE_HOST, REMOTE_PORT) | |
http_client.request(method="POST", url=REMOTE_MONITOR_URI, body=data_post, headers=headers) | |
response = http_client.getresponse() | |
if response.status == 200: | |
return | |
else: | |
logger.warn("response code %d" % response.status) | |
logger.warn("response code %s" % response.read()) | |
except Exception, ex: | |
exception = ex | |
finally: | |
if http_client: | |
http_client.close() | |
if exception: | |
logger.error(exception) | |
if __name__ == '__main__': | |
REMOTE_HOST = 'open.cms.aliyun.com' | |
REMOTE_PORT = 80 | |
# get report address | |
if not os.path.isfile("../cmscfg"): | |
pass | |
else: | |
props = {} | |
prop_file = file("../cmscfg", 'r') | |
for line in prop_file.readlines(): | |
kv = line.split('=') | |
props[kv[0].strip()] = kv[1].strip() | |
prop_file.close() | |
if props.get('report_domain'): | |
REMOTE_HOST = props.get('report_domain') | |
if props.get('report_port'): | |
REMOTE_PORT = props.get('report_port') | |
# get uuid | |
if not os.path.isfile("../aegis_quartz/conf/uuid"): | |
pass | |
else: | |
uuid_file = file("../aegis_quartz/conf/uuid", 'r') | |
UUID = uuid_file.readline() | |
UUID = UUID.lower() | |
REMOTE_MONITOR_URI = "/metrics/putLines" | |
MONITOR_DATA_FILE_DIR = "/tmp" | |
LOG_FILE = "/tmp/" + "vm.log" | |
LOG_LEVEL = logging.INFO | |
LOG_FILE_MAX_BYTES = 1024 * 1024 | |
LOG_FILE_MAX_COUNT = 3 | |
logger = logging.getLogger('sampler') | |
logger.setLevel(LOG_LEVEL) | |
handler = RotatingFileHandler(filename=LOG_FILE, mode='a', maxBytes=LOG_FILE_MAX_BYTES, | |
backupCount=LOG_FILE_MAX_COUNT) | |
formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s') | |
handler.setFormatter(formatter) | |
logger.addHandler(handler) | |
socket.setdefaulttimeout(10) | |
try: | |
collector() | |
except Exception, e: | |
logger.error(e) | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment