Created
April 16, 2012 10:46
-
-
Save SaveTheRbtz/2397652 to your computer and use it in GitHub Desktop.
Linux per-file IO statistics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import logging as log | |
from fileinput import input | |
from collections import namedtuple, defaultdict | |
from pprint import pprint | |
if __debug__: | |
log_level = log.DEBUG | |
else: | |
log_level = log.WARNING | |
log.basicConfig(level=log_level) | |
# XXX: Backport @lru_cache | |
CACHED_ATOF = defaultdict(dict) | |
CACHED_FILES = {} | |
class Stats(object): | |
"""Simple IO statistics class""" | |
def __init__(self): | |
self.perfile = defaultdict(int) | |
self.iops = 0 | |
self.fault_iops = 0 | |
def clear(self): | |
self.perfile.clear() | |
self.iops = 0 | |
self.fault_iops = 0 | |
def __str__(self): | |
return self.__dict__ | |
def address_in_range(address, map_start, map_end): | |
"""XXX:""" | |
if map_start <= int(address, base=16) <= map_end: | |
return True | |
return False | |
def update_file_cache(pid, address): | |
"""XXX:""" | |
Map = namedtuple('Map', 'address perms offset dev inode pathname') | |
try: | |
filename = '/proc/' + pid + '/maps' | |
if filename not in CACHED_FILES: | |
CACHED_FILES[filename] = open(filename) | |
for line in CACHED_FILES[filename].readlines(): | |
try: | |
map_ = Map(*line.split()) | |
map_start, map_end = map(lambda x: int(x, base=16), map_.address.split('-')) | |
if address_in_range(address, map_start, map_end): | |
CACHED_ATOF[pid][map_start, map_end] = map_.pathname | |
except Exception: | |
log.debug("Can't parse line: [ {0} ]".format(line)) | |
CACHED_FILES[filename].seek(0) | |
except Exception: | |
log.info("Can't get file for pid: [ {0} ] from address [ {1} ]".format(pid, address), exc_info=True) | |
def address_to_file(pid, address): | |
"""XXX:""" | |
# XXX: Dirty hack for very loaded servers and static apps | |
if pid not in CACHED_ATOF: | |
print '{0:=^80}'.format(' CACHE MISS [ {0} ]'.format(pid)) | |
update_file_cache(pid, address) | |
for map_start, map_end in CACHED_ATOF[pid]: | |
if address_in_range(address, map_start, map_end): | |
return CACHED_ATOF[pid][map_start, map_end] | |
if __debug__: | |
return pid + '/' + address | |
return 'UNKNOWN' | |
def acc_io(IO, line, read, write): | |
"""XXX:""" | |
io = IO(*line.split()) | |
if 'read' in io.type: | |
acc = read | |
else: | |
acc = write | |
acc.perfile[io.file] += int(io.size) | |
acc.iops += 1 | |
def acc_pfault(Pfault, line, read, write): | |
"""XXX:""" | |
pfault = Pfault(*line.split()) | |
if '_r' in pfault.type: | |
acc = read | |
else: | |
acc = write | |
acc.perfile[address_to_file(pfault.pid, pfault.address)] += 4096 # XXX | |
acc.fault_iops += 1 | |
acc.iops += 1 | |
def sort_by_value(dict_, reverse=True): | |
"""XXX:""" | |
return sorted(dict_.items(), key=lambda x:x[1], reverse=reverse) | |
def print_stats(read, write): | |
"""XXX:""" | |
print "=== SUMMARY ===" | |
print "Total IO: {0}".format(read.iops + write.iops) | |
print "Pagefaults IO: {0}".format(read.fault_iops + write.fault_iops) | |
print "Reads/Writes: {0}/{1}".format(read.iops, write.iops) | |
read_kbs = sum(v for k,v in read.perfile.items()) | |
write_kbs = sum(v for k,v in write.perfile.items()) | |
print "Read/Write Kbs: {0}/{1}".format(read_kbs, write_kbs) | |
for name, stat in zip(('read', 'write'), (read, write)): | |
print "=== STAT {0} ===".format(name) | |
for k,v in sort_by_value(stat.perfile)[:20]: | |
print "{0}:\t{1}".format(k,v) | |
if __debug__: | |
print "=== RAW ===" | |
pprint(sort_by_value(read.perfile)) | |
pprint(sort_by_value(write.perfile)) | |
read.clear() | |
write.clear() | |
def main(): | |
read, write = Stats(), Stats() | |
Pfault = namedtuple('Pfault', 'type pid address') | |
IO = namedtuple('IO', 'type pid file size') | |
for line in input(): | |
if line.startswith('vfs_'): | |
acc_io(IO, line, read, write) | |
elif line.startswith('pfault_'): | |
acc_pfault(Pfault, line, read, write) | |
elif line.startswith('__PRINT__'): | |
print_stats(read, write) | |
print_stats(read, write) | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env stap | |
# | |
# Usage: `stap filestat.stp app_name | python -u aggregate_filestat.py` | |
# | |
global PROCNAME = @1 | |
global fault_address, fault_access | |
global time_offset | |
probe begin { time_offset = gettimeofday_us() } | |
probe vm.pagefault { | |
p = pid() | |
fault_address[p] = address | |
fault_access[p] = write_access ? "w" : "r" | |
} | |
probe vm.pagefault.return { | |
p = pid() | |
if (execname() != PROCNAME) next | |
if (!(p in fault_address)) next | |
if (!vm_fault_contains(fault_type,VM_FAULT_MAJOR)) { | |
delete fault_address[p] | |
delete fault_access[p] | |
next | |
} | |
printf("pfault_%s\t%d\t%p\n", fault_access[p], p, fault_address[p]) | |
delete fault_address[p] | |
delete fault_access[p] | |
} | |
probe kernel.function("vfs_write").return, | |
kernel.function("vfs_read").return { | |
if (execname() == PROCNAME && $return > 0) { | |
printf("%s\t%d\t%s\t%d\n", probefunc(), pid(), d_path(&$file->f_path), $return) | |
} | |
} | |
probe timer.s(1) { | |
printf("__PRINT__\n") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment