Last active
September 29, 2015 18:21
-
-
Save TheCloudlessSky/f5239cd0bf1991e42324 to your computer and use it in GitHub Desktop.
Datadog WMI Significant Performance Improvements
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from wmi_sampler import WMISampler | |
# Sample Check. | |
class Check(object): | |
def __init__(self, logger): | |
self.logger = logger | |
class Processes(Check): | |
def __init__(self, logger): | |
Check.__init__(self, logger) | |
self.wmi_sampler = WMISampler(logger, "Win32_PerfRawData_PerfOS_System", ["ProcessorQueueLength", "Processes"]) | |
def check(self, agentConfig): | |
self.wmi_sampler.sample() | |
self.logger.debug(u"ProcessorQueueLength = %s", self.wmi_sampler.item_property(0, "ProcessorQueueLength")); | |
self.logger.debug(u"Processes = %s", self.wmi_sampler.item_property(0, "Processes")); | |
# try: | |
# os = self._query_wmi('Win32_OperatingSystem', ['TotalVisibleMemorySize', 'FreePhysicalMemory'], [])[0] | |
# except AttributeError: | |
# self.logger.info('Missing Win32_OperatingSystem. No memory metrics will be returned.') | |
# return | |
# total = 0 | |
# free = 0 | |
# cached = 0 | |
# if os['TotalVisibleMemorySize'] is not None and os['FreePhysicalMemory'] is not None: | |
# total = int(os['TotalVisibleMemorySize']) / KB2MB | |
# free = int(os['FreePhysicalMemory']) / KB2MB | |
# self.save_sample('system.mem.total', total) | |
# self.save_sample('system.mem.free', free) | |
# self.save_sample('system.mem.used', total - free) | |
# mem = self._query_wmi('Win32_PerfRawData_PerfOS_Memory', ['CacheBytes', 'CommittedBytes', 'PoolPagedBytes', 'PoolNonpagedBytes'], [])[0] | |
# if mem['CacheBytes'] is not None: | |
# cached = int(mem['CacheBytes']) / B2MB | |
# self.save_sample('system.mem.cached', cached) | |
# if mem['CommittedBytes'] is not None: | |
# self.save_sample('system.mem.committed', int(mem['CommittedBytes']) / B2MB) | |
# if mem['PoolPagedBytes'] is not None: | |
# self.save_sample('system.mem.paged', int(mem['PoolPagedBytes']) / B2MB) | |
# if mem['PoolNonpagedBytes'] is not None: | |
# self.save_sample('system.mem.nonpaged', int(mem['PoolNonpagedBytes']) / B2MB) | |
# usable = free + cached | |
# self.save_sample('system.mem.usable', usable) | |
# if total > 0: | |
# pct_usable = float(usable) / total | |
# self.save_sample('system.mem.pct_usable', pct_usable) | |
# return self.get_metrics() | |
class Memory(Check): | |
def __init__(self, logger): | |
Check.__init__(self, logger) | |
self.os_wmi_sampler = WMISampler(logger, "Win32_OperatingSystem", ["TotalVisibleMemorySize", "FreePhysicalMemory"]) | |
self.mem_wmi_sampler = WMISampler(logger, "Win32_PerfRawData_PerfOS_Memory", ["CacheBytes", "CommittedBytes", "PoolPagedBytes", "PoolNonpagedBytes"]) | |
def check(self, agentConfig): | |
self.os_wmi_sampler.sample() | |
self.logger.debug(u"TotalVisibleMemorySize = %s", self.os_wmi_sampler.item_property(0, "TotalVisibleMemorySize")); | |
self.logger.debug(u"FreePhysicalMemory = %s", self.os_wmi_sampler.item_property(0, "FreePhysicalMemory")); | |
self.mem_wmi_sampler.sample() | |
self.logger.debug(u"CacheBytes = %s", self.mem_wmi_sampler.item_property(0, "CacheBytes")); | |
self.logger.debug(u"CommittedBytes = %s", self.mem_wmi_sampler.item_property(0, "CommittedBytes")); | |
self.logger.debug(u"PoolPagedBytes = %s", self.mem_wmi_sampler.item_property(0, "PoolPagedBytes")); | |
self.logger.debug(u"PoolNonpagedBytes = %s", self.mem_wmi_sampler.item_property(0, "PoolNonpagedBytes")); | |
# try: | |
# os = self._query_wmi('Win32_OperatingSystem', ['TotalVisibleMemorySize', 'FreePhysicalMemory'], [])[0] | |
# except AttributeError: | |
# self.logger.info('Missing Win32_OperatingSystem. No memory metrics will be returned.') | |
# return | |
# total = 0 | |
# free = 0 | |
# cached = 0 | |
# if os['TotalVisibleMemorySize'] is not None and os['FreePhysicalMemory'] is not None: | |
# total = int(os['TotalVisibleMemorySize']) / KB2MB | |
# free = int(os['FreePhysicalMemory']) / KB2MB | |
# self.save_sample('system.mem.total', total) | |
# self.save_sample('system.mem.free', free) | |
# self.save_sample('system.mem.used', total - free) | |
# mem = self._query_wmi('Win32_PerfRawData_PerfOS_Memory', ['CacheBytes', 'CommittedBytes', 'PoolPagedBytes', 'PoolNonpagedBytes'], [])[0] | |
# if mem['CacheBytes'] is not None: | |
# cached = int(mem['CacheBytes']) / B2MB | |
# self.save_sample('system.mem.cached', cached) | |
# if mem['CommittedBytes'] is not None: | |
# self.save_sample('system.mem.committed', int(mem['CommittedBytes']) / B2MB) | |
# if mem['PoolPagedBytes'] is not None: | |
# self.save_sample('system.mem.paged', int(mem['PoolPagedBytes']) / B2MB) | |
# if mem['PoolNonpagedBytes'] is not None: | |
# self.save_sample('system.mem.nonpaged', int(mem['PoolNonpagedBytes']) / B2MB) | |
# usable = free + cached | |
# self.save_sample('system.mem.usable', usable) | |
# if total > 0: | |
# pct_usable = float(usable) / total | |
# self.save_sample('system.mem.pct_usable', pct_usable) | |
# return self.get_metrics() | |
class Cpu(Check): | |
def __init__(self, logger): | |
Check.__init__(self, logger) | |
self.wmi_sampler = WMISampler(logger, "Win32_PerfRawData_PerfOS_Processor", ["Name", "PercentInterruptTime"]) | |
def check(self, agentConfig): | |
self.wmi_sampler.sample() | |
for i in range(0, len(self.wmi_sampler)): | |
self.logger.debug(u"Name = %s, PercentInterruptTime = %s", | |
self.wmi_sampler.item_property(i, "Name"), | |
self.wmi_sampler.item_property(i, "PercentInterruptTime") | |
); | |
# try: | |
# cpu = self._query_wmi('Win32_PerfRawData_PerfOS_Processor', ['Name', 'PercentInterruptTime'], []) | |
# except AttributeError: | |
# self.logger.info('Missing Win32_PerfRawData_PerfOS_Processor WMI class.' | |
# ' No CPU metrics will be returned.') | |
# return | |
# cpu_interrupt = self._average_metric(cpu, 'PercentInterruptTime') | |
# if cpu_interrupt is not None: | |
# self.save_sample('system.cpu.interrupt', cpu_interrupt) | |
# cpu_percent = psutil.cpu_times() | |
# self.save_sample('system.cpu.user', 100 * cpu_percent.user / psutil.NUM_CPUS) | |
# self.save_sample('system.cpu.idle', 100 * cpu_percent.idle / psutil.NUM_CPUS) | |
# self.save_sample('system.cpu.system', 100 * cpu_percent.system / psutil.NUM_CPUS) | |
# return self.get_metrics() | |
class Network(Check): | |
def __init__(self, logger): | |
Check.__init__(self, logger) | |
self.wmi_sampler = WMISampler(logger, "Win32_PerfRawData_Tcpip_NetworkInterface", ["Name", "BytesReceivedPerSec", "BytesSentPerSec"]) | |
def check(self, agentConfig): | |
self.wmi_sampler.sample() | |
for i in range(0, len(self.wmi_sampler)): | |
self.logger.debug(u"Name = %s, BytesReceivedPerSec = %s, BytesSentPerSec = %s", | |
self.wmi_sampler.item_property(i, "Name"), | |
self.wmi_sampler.item_property(i, "BytesReceivedPerSec"), | |
self.wmi_sampler.item_property(i, "BytesSentPerSec") | |
); | |
# try: | |
# net = self._query_wmi('Win32_PerfRawData_Tcpip_NetworkInterface', ['Name', 'BytesReceivedPerSec', 'BytesSentPerSec'], []) | |
# except AttributeError: | |
# self.logger.info('Missing Win32_PerfRawData_Tcpip_NetworkInterface WMI class.' | |
# ' No network metrics will be returned') | |
# return | |
# for iface in net: | |
# name = self.normalize_device_name(iface['Name']) | |
# if iface['BytesReceivedPerSec'] is not None: | |
# self.save_sample('system.net.bytes_rcvd', iface['BytesReceivedPerSec'], | |
# device_name=name) | |
# if iface['BytesSentPerSec'] is not None: | |
# self.save_sample('system.net.bytes_sent', iface['BytesSentPerSec'], | |
# device_name=name) | |
# return self.get_metrics() | |
class IO(Check): | |
def __init__(self, logger): | |
Check.__init__(self, logger) | |
self.wmi_sampler = WMISampler(logger, "Win32_PerfRawData_PerfDisk_LogicalDisk", ["Name", "DiskWriteBytesPerSec", "DiskWritesPerSec", "DiskReadBytesPerSec", "DiskReadsPerSec", "CurrentDiskQueueLength"]) | |
def check(self, agentConfig): | |
self.wmi_sampler.sample() | |
for i in range(0, len(self.wmi_sampler)): | |
self.logger.debug(u"Name = %s, DiskWriteBytesPerSec = %s, DiskWritesPerSec = %s, DiskReadBytesPerSec = %s, DiskReadsPerSec = %s, CurrentDiskQueueLength = %s", | |
self.wmi_sampler.item_property(i, "Name"), | |
self.wmi_sampler.item_property(i, "DiskWriteBytesPerSec"), | |
self.wmi_sampler.item_property(i, "DiskWritesPerSec"), | |
self.wmi_sampler.item_property(i, "DiskReadBytesPerSec"), | |
self.wmi_sampler.item_property(i, "DiskReadsPerSec"), | |
self.wmi_sampler.item_property(i, "CurrentDiskQueueLength") | |
); | |
# try: | |
# disk = self._query_wmi('Win32_PerfRawData_PerfDisk_LogicalDisk', ['Name', 'DiskWriteBytesPerSec', 'DiskWritesPerSec', 'DiskReadBytesPerSec', 'DiskReadsPerSec', 'CurrentDiskQueueLength'], []) | |
# except AttributeError: | |
# self.logger.info('Missing Win32_PerfRawData_PerfDisk_LogicalDiskUnable WMI class.' | |
# ' No I/O metrics will be returned.') | |
# return | |
# blacklist_re = agentConfig.get('device_blacklist_re', None) | |
# for device in disk: | |
# name = self.normalize_device_name(device['Name']) | |
# if should_ignore_disk(name, blacklist_re): | |
# continue | |
# if device['DiskWriteBytesPerSec'] is not None: | |
# self.save_sample('system.io.wkb_s', int(device['DiskWriteBytesPerSec']) / B2KB, | |
# device_name=name) | |
# if device['DiskWritesPerSec'] is not None: | |
# self.save_sample('system.io.w_s', int(device['DiskWritesPerSec']), | |
# device_name=name) | |
# if device['DiskReadBytesPerSec'] is not None: | |
# self.save_sample('system.io.rkb_s', int(device['DiskReadBytesPerSec']) / B2KB, | |
# device_name=name) | |
# if device['DiskReadsPerSec'] is not None: | |
# self.save_sample('system.io.r_s', int(device['DiskReadsPerSec']), | |
# device_name=name) | |
# if device['CurrentDiskQueueLength'] is not None: | |
# self.save_sample('system.io.avg_q_sz', device['CurrentDiskQueueLength'], | |
# device_name=name) | |
# return self.get_metrics() | |
if __name__ == "__main__": | |
import time | |
import logging | |
logging.basicConfig(level=logging.INFO, format='%(asctime)-15s %(message)s') | |
log = logging.getLogger() | |
agentConfig = {} | |
processes = Processes(log) | |
memory = Memory(log) | |
cpu = Cpu(log) | |
network = Network(log) | |
io = IO(log) | |
while True: | |
start_time = time.time() | |
processes.check(agentConfig) | |
memory.check(agentConfig) | |
cpu.check(agentConfig) | |
network.check(agentConfig) | |
io.check(agentConfig) | |
log.info("Checks completed in {0:0.2f}ms".format( | |
((time.time() - start_time) * 1000.0) | |
)) | |
time.sleep(1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 3rd party | |
import pywintypes | |
from win32com.client import Dispatch | |
# To improve the first WMI query's performance, share the same WMI connection. | |
wmi_locators = {} | |
wmi_connections = {} | |
class CaseInsensitiveDict(dict): | |
def __setitem__(self, key, value): | |
super(CaseInsensitiveDict, self).__setitem__(key.lower(), value) | |
def __getitem__(self, key): | |
return super(CaseInsensitiveDict, self).__getitem__(key.lower()) | |
def __contains__(self, key): | |
return super(CaseInsensitiveDict, self).__contains__(key.lower()) | |
def get(self, key): | |
return super(CaseInsensitiveDict, self).get(key.lower()) | |
def has_key(self, key): | |
return super(CaseInsensitiveDict, self).has_key(key.lower()) | |
# Use MSDN to lookup the class, property and counter type to determine the | |
# appropriate calculator. | |
# | |
# For example, "Win32_PerfRawData_PerfOS_Memory" is located at: | |
# https://msdn.microsoft.com/en-us/library/aa394314(v=vs.85).aspx | |
# and the "CacheBytes" property has a CounterType = 65792 which | |
# can be determined from: | |
# https://msdn.microsoft.com/en-us/library/aa389383(v=vs.85).aspx | |
# The CounterType 65792 (PERF_COUNTER_LARGE_RAWCOUNT) is defined here: | |
# https://technet.microsoft.com/en-us/library/cc780300(v=ws.10).aspx | |
# From: https://msdn.microsoft.com/en-us/library/aa389383(v=vs.85).aspx | |
counter_type_calculators = {} | |
def calculator(counter_type): | |
def get_calculator(func): | |
counter_type_calculators[counter_type] = func | |
return func | |
return get_calculator | |
# https://technet.microsoft.com/en-us/library/cc757032(v=ws.10).aspx | |
@calculator(65536) | |
def calculate_perf_counter_rawcount(previous, current, item_index, property_name): | |
return current[item_index][property_name] | |
# https://technet.microsoft.com/en-us/library/cc780300(v=ws.10).aspx | |
@calculator(65792) | |
def calculate_perf_counter_large_rawcount(previous, current, item_index, property_name): | |
return current[item_index][property_name] | |
# https://technet.microsoft.com/en-us/library/cc728274(v=ws.10).aspx | |
@calculator(542180608) | |
def calculate_perf_100nsec_timer(previous, current, item_index, property_name): | |
n0 = previous[item_index][property_name] | |
n1 = current[item_index][property_name] | |
d0 = previous[item_index]["Timestamp_Sys100NS"] | |
d1 = current[item_index]["Timestamp_Sys100NS"] | |
if n0 is None or n1 is None: | |
return None | |
result = (n1 - n0) / (d1 - d0) * 100 | |
return result | |
# https://technet.microsoft.com/en-us/library/cc757486(v=ws.10).aspx | |
@calculator(272696576) | |
def calculate_perf_counter_bulk_count(previous, current, item_index, property_name): | |
n0 = previous[item_index][property_name] | |
n1 = current[item_index][property_name] | |
d0 = previous[item_index]["Timestamp_Sys100NS"] | |
d1 = current[item_index]["Timestamp_Sys100NS"] | |
f = current[item_index]["Frequency_Sys100NS"] | |
if n0 is None or n1 is None: | |
return None | |
result = (n1 - n0) / ((d1 - d0) / f) | |
return result | |
# https://technet.microsoft.com/en-us/library/cc740048(v=ws.10).aspx | |
@calculator(272696320) | |
def calculate_perf_counter_counter(previous, current, item_index, property_name): | |
n0 = previous[item_index][property_name] | |
n1 = current[item_index][property_name] | |
d0 = previous[item_index]["Timestamp_Sys100NS"] | |
d1 = current[item_index]["Timestamp_Sys100NS"] | |
f = current[item_index]["Frequency_Sys100NS"] | |
if n0 is None or n1 is None: | |
return None | |
result = (n1 - n0) / ((d1 - d0) / f) | |
return result | |
class WMISampler(object): | |
def __init__(self, logger, class_name, property_names, host="localhost", namespace="root\\cimv2", username="", password=""): | |
self.logger = logger | |
self.class_name = class_name | |
self.host = host | |
self.namespace = namespace | |
self.username = username | |
self.password = password | |
self.is_raw_perf_class = "_PERFRAWDATA_" in class_name.upper() | |
# Include required properties for making calculations with raw | |
# performance counters: | |
# https://msdn.microsoft.com/en-us/library/aa394299(v=vs.85).aspx | |
if self.is_raw_perf_class: | |
property_names.extend([ | |
"Timestamp_Sys100NS", | |
"Frequency_Sys100NS", | |
# IMPORTANT: To improve performance and since they're currently | |
# not needed, do not include the other Timestamp/Frequency | |
# properties: | |
# - Timestamp_PerfTime | |
# - Timestamp_Object | |
# - Frequency_PerfTime | |
# - Frequency_Object" | |
]) | |
self.property_names = property_names | |
self.current_sample = None | |
self.previous_sample = None | |
self.property_counter_types = None | |
def sample(self): | |
# Because raw performance counters usually require a difference of | |
# previous/current values for the calculation, initial values are | |
# queried. | |
if self.is_raw_perf_class and not self.previous_sample: | |
self.logger.debug(u"Querying for initial sample for raw performance counter.") | |
self.current_sample = self._query(self.class_name, self.property_names) | |
self.previous_sample = self.current_sample | |
self.current_sample = self._query(self.class_name, self.property_names) | |
self.logger.debug(u"Sample: {0}".format(self.current_sample)) | |
def __len__(self): | |
return len(self.current_sample) | |
def item_property(self, item_index, property_name): | |
counter_type = self.property_counter_types.get(property_name) | |
try: | |
if self.is_raw_perf_class and counter_type: | |
return self._calculate_property_value(item_index, property_name, counter_type) | |
else: | |
property_value = self.current_sample[item_index][property_name] | |
return property_value | |
except IndexError: | |
self.logger.info("Invalid item index: {class_name}[{item_index}]['{property_name}'].".format( | |
class_name=self.class_name, | |
item_index=item_index, | |
property_name=property_name | |
), exc_info=True) | |
return None | |
def _calculate_property_value(self, item_index, property_name, counter_type): | |
calculator = counter_type_calculators[counter_type] | |
calculated_property_value = calculator(self.previous_sample, self.current_sample, item_index, property_name) | |
return calculated_property_value | |
def _query(self, class_name, property_names): | |
formated_property_names = ",".join(property_names) | |
wql = "select {property_names} from {class_name}".format( | |
property_names=formated_property_names, | |
class_name=class_name | |
) | |
self.logger.debug(u"Querying WMI: {0}".format(wql)) | |
try: | |
# From: https://msdn.microsoft.com/en-us/library/aa393866(v=vs.85).aspx | |
flag_return_immediately = 0x10 # Default flag. | |
flag_forward_only = 0x20 | |
flag_use_amended_qualifiers = 0x20000 | |
query_flags = flag_return_immediately | flag_forward_only | |
# For the first query, cache the qualifiers to determine each | |
# propertie's "CounterType" | |
if self.property_counter_types is None: | |
self.property_counter_types = CaseInsensitiveDict() | |
query_flags |= flag_use_amended_qualifiers | |
raw_results = self._get_connection().ExecQuery(wql, "WQL", query_flags) | |
results = self._extract_results(raw_results, property_names, includes_qualifiers=False) | |
except pywintypes.com_error as ex: | |
self.logger.warning(u"Failed to execute WMI query (%s)", wql, exc_info=True) | |
results = [] | |
return results | |
def _extract_results(self, raw_results, property_names, includes_qualifiers): | |
results = [] | |
for res in raw_results: | |
# Ensure all properties are available. Use case-insensitivity | |
# because some properties are returned with different cases. | |
item = CaseInsensitiveDict() | |
for prop_name in property_names: | |
item[prop_name] = None | |
for wmi_property in res.Properties_: | |
# IMPORTANT: To improve performance, only access the Qualifiers | |
# if the "CounterType" hasn't already been cached. | |
should_get_qualifier_type = ( | |
includes_qualifiers and | |
wmi_property.Name not in self.property_counter_types | |
) | |
if should_get_qualifier_type: | |
# Can't index into "Qualifiers_" for keys that don't exist | |
# without getting an exception. | |
qualifiers = { q.Name: q.Value for q in wmi_property.Qualifiers_ } | |
# Some properties like "Name" and "Timestamp_Sys100NS" do | |
# not have a "CounterType" (since they're not a counter). | |
# Therefore, they're ignored. | |
if "CounterType" in qualifiers: | |
counter_type = qualifiers["CounterType"] | |
self.property_counter_types[wmi_property.Name] = counter_type | |
self.logger.debug(u"Caching property qualifier CounterType: %s.%s = %s", | |
self.class_name, | |
wmi_property.Name, | |
counter_type | |
) | |
else: | |
self.logger.debug(u"CounterType qualifier not found for %s.%s", | |
self.class_name, | |
wmi_property.Name | |
) | |
try: | |
item[wmi_property.Name] = float(wmi_property.Value) | |
except ValueError: | |
item[wmi_property.Name] = wmi_property.Value | |
results.append(item) | |
return results | |
def _get_connection(self): | |
connection_key = "{host}:{namespace}:{username}".format( | |
host=self.host, | |
namespace = self.namespace, | |
username = self.username | |
) | |
if connection_key in wmi_connections: | |
self.logger.debug(u"Using cached connection (host=%s, namespace=%s, username=%s).", | |
self.host, | |
self.namespace, | |
self.username | |
) | |
return wmi_connections[connection_key] | |
self.logger.debug("Connecting to WMI server (host=%s, namespace=%s, username=%s).", | |
self.host, | |
self.namespace, | |
self.username | |
) | |
locator = Dispatch("WbemScripting.SWbemLocator") | |
wmi_locators[connection_key] = locator | |
connection = locator.ConnectServer(self.host, self.namespace, self.username, self.password) | |
wmi_connections[connection_key] = connection | |
return connection |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment