Last active
December 14, 2020 11:29
-
-
Save tueda/3e1b2bec8545c48737c7 to your computer and use it in GitHub Desktop.
FORM log profiler. #bin #form #python NOTE: migrating to https://github.com/tueda/formprof.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| """:" . | |
| exec python "$0" "$@" | |
| """ | |
| from __future__ import print_function | |
| import argparse | |
| import copy | |
| import re | |
| import sys | |
| __doc__ = """\ | |
| FORM log profiler. | |
| Example | |
| ------- | |
| $ formprof.py myformprogram.log | |
| $ formprof.py -u myformprogram.log | |
| $ formprof.py -t myformprogram.log | |
| $ formprof.py -m myformprogram.log | |
| $ formprof.py -e myformprogram.log | |
| Python versions | |
| --------------- | |
| 2.7, 3.2, 3.3, 3.4, 3.5 | |
| """ | |
| if sys.version_info[0] >= 3: | |
| string_types = str, | |
| else: | |
| string_types = basestring, | |
| class Stat(object): | |
| """Statistics object.""" | |
| __slots__ = ( | |
| 'name', # module name (str) | |
| 'expr', # expression name (str) | |
| 'start', # starting time (float) | |
| 'end', # end time (float) | |
| 'elapsed', # elapsed time (float) | |
| 'count', # number of occurrence (int) | |
| 'generated_terms', # number of generated terms (int) | |
| 'terms_in_output', # number of terms in output (int) | |
| 'bytes_used', # number of bytes used in output (int) | |
| 'parent', # parent node (Stat) | |
| 'children', # child nodes (List[Stat]) | |
| ) | |
| def is_leaf(self): | |
| """Return True if the object is a leaf.""" | |
| # A node has one or more module statistics information, so `start` and | |
| # `end` are useless. | |
| return hasattr(self, 'start') | |
| def add_child(self, other): | |
| """Add a child item.""" | |
| if not isinstance(other, Stat): | |
| raise TypeError('the object is not "Stat"') | |
| if getattr(other, 'parent', None): | |
| raise ValueError('the object already has a parent.') | |
| n = getattr(self, 'parent', None) | |
| while n: | |
| if n == other: | |
| raise ValueError('tried to add an ancestor as a child') | |
| n = n.parent | |
| # NOTE: objects in a tree must have the attributes `parent`, | |
| # `children`, `elapsed` and `count`, and most likely have `name`. | |
| if not hasattr(self, 'children'): | |
| if self.is_leaf(): | |
| # Convert this object from a leaf to a node. | |
| s = copy.copy(self) | |
| self.children = [] | |
| self.children.append(s) | |
| s.parent = self | |
| s.children = [] | |
| for a in self.__slots__: | |
| if a not in ('name', 'elapsed', 'count', 'parent', | |
| 'children'): | |
| if hasattr(self, a): | |
| delattr(self, a) | |
| else: | |
| self.children = [] | |
| if not hasattr(self, 'parent'): | |
| self.parent = None | |
| self.children.append(other) | |
| other.parent = self | |
| if not hasattr(other, 'children'): | |
| other.children = [] | |
| if not hasattr(self, 'elapsed'): | |
| self.elapsed = 0.0 | |
| if not hasattr(self, 'count'): | |
| self.count = 0 | |
| if not hasattr(other, 'elapsed'): | |
| other.elapsed = 0.0 | |
| if not hasattr(other, 'count'): | |
| other.count = 0 | |
| # Add the elapsed time and count to this node and ancestors. | |
| n = self | |
| while n: | |
| n.elapsed += other.elapsed | |
| n.count += other.count | |
| n = n.parent | |
| def __str__(self): | |
| """Return the string representation.""" | |
| items = [] | |
| for a in self.__slots__: | |
| if hasattr(self, a): | |
| items.append('{0}={1}'.format(a, getattr(self, a))) | |
| return '[Stat{0}{1}]'.format( | |
| ' ' if items else '', | |
| ', '.join(items), | |
| ) | |
| def analyze_logfile(file): | |
| """Generator of Stat objects from a file.""" | |
| if isinstance(file, string_types): | |
| with open(file, 'r') as f: | |
| for s in analyze_logfile(f): | |
| yield s | |
| return | |
| SEARCH_TIME = 0 # noqa: N806 | |
| SKIP_TIME = 1 # noqa: N806 | |
| SEARCH_EXPR = 2 # noqa: N806 | |
| SEARCH_NAME = 3 # noqa: N806 | |
| state = SEARCH_TIME | |
| name = None | |
| expr = None | |
| time = 0.0 | |
| old_time = 0.0 | |
| generated_terms = 0 | |
| terms_in_output = 0 | |
| bytes_used = 0 | |
| for line in file: | |
| line = line.rstrip() | |
| if state == SEARCH_TIME: | |
| m = re.match('\s*(?:Thread|Process) \d+ reporting', line) | |
| if m: | |
| state = SKIP_TIME | |
| continue | |
| m = re.match('W?Time =\s*([0-9.]+) sec \s*' | |
| 'Generated terms =\s*(\d+)', line) | |
| if m: | |
| time = float(m.group(1)) | |
| generated_terms = int(m.group(2)) | |
| state = SEARCH_EXPR | |
| elif state == SKIP_TIME: | |
| m = re.match('W?Time =\s*[0-9.]+ sec \s*' | |
| 'Generated terms =\s*\d+', line) | |
| if m: | |
| state = SEARCH_TIME | |
| elif state == SEARCH_EXPR: | |
| m = re.search('[0-9]+ Terms (?:left|active)', line) | |
| if m: | |
| state = SEARCH_TIME | |
| m = re.match('\s*(\S+?)\s* Terms in output =\s*(\d+)', line) | |
| if m: | |
| expr = m.group(1) | |
| terms_in_output = int(m.group(2)) | |
| state = SEARCH_NAME | |
| elif state == SEARCH_NAME: | |
| m = re.match('(.+?)Bytes used \s*=\s*(\d+)', line) | |
| if m: | |
| name = m.group(1).strip() | |
| if not name: | |
| name = '<unnamed>' | |
| bytes_used = int(m.group(2)) | |
| state = SEARCH_TIME | |
| stat = Stat() | |
| stat.name = name | |
| stat.expr = expr | |
| stat.start = old_time | |
| stat.end = time | |
| stat.elapsed = time - old_time | |
| stat.count = 1 | |
| stat.generated_terms = generated_terms | |
| stat.terms_in_output = terms_in_output | |
| stat.bytes_used = bytes_used | |
| old_time = time | |
| yield stat | |
| def print_normal(stats, sort): | |
| """Print statistics in the normal mode.""" | |
| total_time = stats[-1].end | |
| # Sort. | |
| if sort: | |
| stats.sort(key=lambda s: -s.elapsed) | |
| # Stringification. | |
| stats = [( | |
| s.name, | |
| s.expr, | |
| '{0:.2f}'.format(s.elapsed), | |
| '{0:.2%}'.format(s.elapsed / total_time), | |
| '{0:.2f}'.format(s.start), | |
| '{0:.2f}'.format(s.end), | |
| str(s.generated_terms), | |
| str(s.terms_in_output), | |
| str(s.bytes_used), | |
| ) for s in stats] | |
| # Construct the format. | |
| columns = [ | |
| 'module ', | |
| 'expr ', | |
| 'time', | |
| ' ', | |
| 'start', | |
| 'end', | |
| 'genterms', | |
| 'outterms', | |
| 'bytes', | |
| ] | |
| column_widths = [ | |
| max(max(len(s[i]) for s in stats), len(columns[i])) | |
| for i in range(len(columns)) | |
| ] | |
| fmt = ( | |
| '{{0:<{0}}} {{1:<{1}}} {{2:>{2}}} {{3:>{3}}} {{4:>{4}}} ' | |
| '{{5:>{5}}} {{6:>{6}}} {{7:>{7}}} {{8:>{8}}}' | |
| ).format(*column_widths) | |
| # Print the result. | |
| print(fmt.format(*columns)) | |
| for s in stats: | |
| print(fmt.format(*s)) | |
| def print_module(stats, sort): | |
| """Print statistics combined for each module.""" | |
| total_time = stats[-1].end | |
| # Combine. | |
| new_stats = {} # str -> Stat | |
| for s in stats: | |
| if s.name not in new_stats: | |
| t = Stat() | |
| t.name = s.name | |
| t.count = 1 | |
| t.elapsed = s.elapsed | |
| new_stats[s.name] = t | |
| else: | |
| t = new_stats[s.name] | |
| t.count += 1 | |
| t.elapsed += s.elapsed | |
| stats = list(new_stats.values()) | |
| # Sort. | |
| if sort: | |
| stats.sort(key=lambda s: -s.elapsed) | |
| # Stringification. | |
| stats = [( | |
| s.name, | |
| str(s.count), | |
| '{0:.2f}'.format(s.elapsed), | |
| '{0:.2%}'.format(s.elapsed / total_time), | |
| ) for s in stats] | |
| # Construct the format. | |
| columns = [ | |
| 'module ', | |
| 'count', | |
| 'time', | |
| ' ', | |
| ] | |
| column_widths = [ | |
| max(max(len(s[i]) for s in stats), len(columns[i])) | |
| for i in range(len(columns)) | |
| ] | |
| fmt = ( | |
| '{{0:<{0}}} {{1:>{1}}} {{2:>{2}}} {{3:>{3}}}' | |
| ).format(*column_widths) | |
| # Print the result. | |
| print(fmt.format(*columns).rstrip()) | |
| for s in stats: | |
| print(fmt.format(*s)) | |
| def print_expr(stats, sort): | |
| """Print statistics combined for each expression.""" | |
| total_time = stats[-1].end | |
| # Combine. | |
| new_stats = {} # str -> Stat | |
| for s in stats: | |
| if s.expr not in new_stats: | |
| t = Stat() | |
| t.expr = s.expr | |
| t.count = 1 | |
| t.elapsed = s.elapsed | |
| new_stats[s.expr] = t | |
| else: | |
| t = new_stats[s.expr] | |
| t.count += 1 | |
| t.elapsed += s.elapsed | |
| stats = list(new_stats.values()) | |
| # Sort. | |
| if sort: | |
| stats.sort(key=lambda s: -s.elapsed) | |
| # Stringification. | |
| stats = [( | |
| s.expr, | |
| str(s.count), | |
| '{0:.2f}'.format(s.elapsed), | |
| '{0:.2%}'.format(s.elapsed / total_time), | |
| ) for s in stats] | |
| # Construct the format. | |
| columns = [ | |
| 'expr ', | |
| 'count', | |
| 'time', | |
| ' ', | |
| ] | |
| column_widths = [ | |
| max(max(len(s[i]) for s in stats), len(columns[i])) | |
| for i in range(len(columns)) | |
| ] | |
| fmt = ( | |
| '{{0:<{0}}} {{1:>{1}}} {{2:>{2}}} {{3:>{3}}}' | |
| ).format(*column_widths) | |
| # Print the result. | |
| print(fmt.format(*columns).rstrip()) | |
| for s in stats: | |
| print(fmt.format(*s)) | |
| def print_tree(stats, sort): | |
| """Print statistics combined for each module (tree-like).""" | |
| total_time = stats[-1].end | |
| # Construct the tree. Each node is determined from the module name, | |
| # separated by '-'. For example, | |
| # MyModule-1 | |
| # MyModule-2-a | |
| # MyModule-2-b | |
| # MyModule-3 | |
| # MyModule-3-a | |
| # are converted as | |
| # MyModule* | |
| # +- MyModule-1 | |
| # +- MyModule-2* | |
| # | +- MyModule-2-a | |
| # | +- MyModule-2-b | |
| # +- MyModule-3* | |
| # +- MyModule-3 | |
| # +- MyModule-3-a | |
| root = Stat() | |
| root.name = '*' | |
| nodes = {} # str -> Stat | |
| for s in stats: | |
| names = s.name.split('-') | |
| names = ['-'.join(names[:i + 1]) for i in range(len(names))] | |
| n = root | |
| for name in names: | |
| if name not in nodes: | |
| new_node = Stat() | |
| new_node.name = name + '*' | |
| nodes[name] = new_node | |
| n.add_child(new_node) | |
| n = nodes[name] | |
| n.add_child(s) | |
| # Sort. | |
| def sort_tree(node): | |
| node.children.sort(key=lambda s: -s.elapsed) | |
| for n in node.children: | |
| sort_tree(n) | |
| if sort: | |
| sort_tree(root) | |
| # Stringification. | |
| stats = [] | |
| def walk_tree(node, indent_str, last): | |
| if len(node.children) == 1: | |
| walk_tree(node.children[0], indent_str, last) | |
| return | |
| s = node | |
| ss = indent_str + ('+- ' if node != root else '') + s.name | |
| if node.is_leaf(): | |
| stats.append([ | |
| ss, | |
| s.expr, | |
| '', | |
| '{0:.2f}'.format(s.elapsed), | |
| '{0:.2%}'.format(s.elapsed / total_time), | |
| '{0:.2f}'.format(s.start), | |
| '{0:.2f}'.format(s.end), | |
| str(s.generated_terms), | |
| str(s.terms_in_output), | |
| str(s.bytes_used), | |
| ]) | |
| else: | |
| stats.append([ | |
| ss, | |
| '', | |
| str(s.count), | |
| '{0:.2f}'.format(s.elapsed), | |
| '{0:.2%}'.format(s.elapsed / total_time), | |
| '', | |
| '', | |
| '', | |
| '', | |
| '' | |
| ]) | |
| if node != root: | |
| if (last): | |
| indent_str += ' ' | |
| else: | |
| indent_str += '| ' | |
| for n in node.children: | |
| walk_tree(n, indent_str, n == node.children[-1]) | |
| walk_tree(root, '', True) | |
| # Construct the format. | |
| columns = [ | |
| 'module ', | |
| 'expr ', | |
| 'count', | |
| 'time', | |
| ' ', | |
| 'start', | |
| 'end', | |
| 'genterms', | |
| 'outterms', | |
| 'bytes', | |
| ] | |
| column_widths = [ | |
| max(max(len(s[i]) for s in stats), len(columns[i])) | |
| for i in range(len(columns)) | |
| ] | |
| fmt = ( | |
| '{{0:<{0}}} {{1:<{1}}} {{2:>{2}}} {{3:>{3}}} {{4:>{4}}} ' | |
| '{{5:>{5}}} {{6:>{6}}} {{7:>{7}}} {{8:>{8}}} {{9:>{9}}}' | |
| ).format(*column_widths) | |
| # Print the result. | |
| print(fmt.format(*columns)) | |
| for s in stats: | |
| print(fmt.format(*s).rstrip()) | |
| def main(): | |
| """Entry point.""" | |
| from signal import signal, SIGPIPE, SIG_DFL | |
| signal(SIGPIPE, SIG_DFL) | |
| # Parse the command line arguments. | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('logfile', | |
| type=str, | |
| metavar='LOGFILE', | |
| help='log file to be analyzed') | |
| parser.add_argument('-t', | |
| '--tree', | |
| action='store_const', | |
| const='tree', | |
| dest='mode', | |
| help='combine statistics for each module (tree-like)') | |
| parser.add_argument('-m', | |
| '--module', | |
| action='store_const', | |
| const='module', | |
| dest='mode', | |
| help='combine statistics for each module') | |
| parser.add_argument('-e', | |
| '--expr', | |
| action='store_const', | |
| const='expr', | |
| dest='mode', | |
| help='combine statistics for each expression') | |
| parser.add_argument('-s', | |
| '--sort', | |
| action='store_const', | |
| const=True, | |
| default=True, | |
| help='sort by time (default)') | |
| parser.add_argument('-u', | |
| '--unsort', | |
| action='store_const', | |
| const=False, | |
| dest='sort', | |
| help='do not sort') | |
| args = parser.parse_args() | |
| # Parse the log file. | |
| stats = list(analyze_logfile(args.logfile)) | |
| if not stats: | |
| print('empty log', file=sys.stderr) | |
| return | |
| # Print statistics. | |
| if args.mode == 'tree': | |
| print_tree(stats, args.sort) | |
| elif args.mode == 'module': | |
| print_module(stats, args.sort) | |
| elif args.mode == 'expr': | |
| print_expr(stats, args.sort) | |
| else: | |
| print_normal(stats, args.sort) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment