beugley · March 14, 2022 00:13 · tomdottom · Nov 29, 2016
diff --git a/sar_to_json.py b/sar_to_json.py
 #!/usr/bin/env python
 ###############################################################################
 ## sar_to_json.py
 ##
 ## Reformats output from sadf to json.  sadf must be invoked with the -D
 ## switch.  Timestamps are displayed in ISO6801 format (YYYY-MM-DDThh:mm:ssZ).
 ## Example:    sadf -D -- -A | sar_to_json.py
 ###############################################################################

 import sys
 import datetime
 import json
 from collections import OrderedDict

 # Only expected patterns will be processed.  All others are ignored.
 pattern_3key = ["tps", "pgpgin/s", "runq-sz", "kbmemfree", "frmpg/s",
                "kbswpfree", "dentunusd", "proc/s", "pswpin/s", "call/s",
                "scall/s", "totsck"]
 pattern_4key = ["CPU", "DEV", "IFACE"]
 #pattern_4key = ["CPU", "TTY", "DEV", "IFACE"]
 # TTY device activity is printed incorrectly by my version of sadf, so it's
 # excluded for now.

 d1 = OrderedDict()
 for line in sys.stdin:
   if (line[0] == "#"):
      # Get column names from the header line.
      # The first 3 columns (host, interval, timestamp) are the key.  Some
      # sadf options print multiple lines for each key (such as CPU which
      # have 1 line per CPU; these have a 4th column in the key).
      columns = line[2:].strip().split(';')
      key_cols = (3 if columns[3] in pattern_3key else
                 (4 if columns[3] in pattern_4key else 0))
   elif key_cols:
      # Get values from the data line.
      values = line.strip().split(';')
      values[2] = datetime.datetime.utcfromtimestamp(
                  int(values[2])).isoformat()+'Z'
      key = '|'.join(values[:3])
      # Add the 3-column key if it doesn't already exist.
      if (key not in d1):
         d1[key] = OrderedDict()
      # Iterate through all values in the data line.
      for i,a in enumerate(values):
         if (key_cols == 3):
            d1[key][columns[i]] = a
         else:
            if (i < 3):
               d1[key][columns[i]] = a
            elif (i == 3):
               # This data line contains a 4th key field.  Create a child
               # dictionary to hold all sibling data lines.
               key_sub = values[i]
               column_sub = columns[i]
               if (column_sub not in d1[key]):
                  d1[key][column_sub] = OrderedDict()
            else:
               # Add the sibling data lines to the child dictionary.
               if (key_sub not in d1[key][column_sub]):
                  d1[key][column_sub][key_sub] = OrderedDict()
               d1[key][column_sub][key_sub][columns[i]] = a

 for k,v in d1.iteritems():
   print json.dumps(v)
	#!/usr/bin/env python
	###############################################################################
	## sar_to_json.py
	##
	## Reformats output from sadf to json. sadf must be invoked with the -D
	## switch. Timestamps are displayed in ISO6801 format (YYYY-MM-DDThh:mm:ssZ).
	## Example: sadf -D -- -A \| sar_to_json.py
	###############################################################################

	import sys
	import datetime
	import json
	from collections import OrderedDict

	# Only expected patterns will be processed. All others are ignored.
	pattern_3key = ["tps", "pgpgin/s", "runq-sz", "kbmemfree", "frmpg/s",
	"kbswpfree", "dentunusd", "proc/s", "pswpin/s", "call/s",
	"scall/s", "totsck"]
	pattern_4key = ["CPU", "DEV", "IFACE"]
	#pattern_4key = ["CPU", "TTY", "DEV", "IFACE"]
	# TTY device activity is printed incorrectly by my version of sadf, so it's
	# excluded for now.

	d1 = OrderedDict()
	for line in sys.stdin:
	if (line[0] == "#"):
	# Get column names from the header line.
	# The first 3 columns (host, interval, timestamp) are the key. Some
	# sadf options print multiple lines for each key (such as CPU which
	# have 1 line per CPU; these have a 4th column in the key).
	columns = line[2:].strip().split(';')
	key_cols = (3 if columns[3] in pattern_3key else
	(4 if columns[3] in pattern_4key else 0))
	elif key_cols:
	# Get values from the data line.
	values = line.strip().split(';')
	values[2] = datetime.datetime.utcfromtimestamp(
	int(values[2])).isoformat()+'Z'
	key = '\|'.join(values[:3])
	# Add the 3-column key if it doesn't already exist.
	if (key not in d1):
	d1[key] = OrderedDict()
	# Iterate through all values in the data line.
	for i,a in enumerate(values):
	if (key_cols == 3):
	d1[key][columns[i]] = a
	else:
	if (i < 3):
	d1[key][columns[i]] = a
	elif (i == 3):
	# This data line contains a 4th key field. Create a child
	# dictionary to hold all sibling data lines.
	key_sub = values[i]
	column_sub = columns[i]
	if (column_sub not in d1[key]):
	d1[key][column_sub] = OrderedDict()
	else:
	# Add the sibling data lines to the child dictionary.
	if (key_sub not in d1[key][column_sub]):
	d1[key][column_sub][key_sub] = OrderedDict()
	d1[key][column_sub][key_sub][columns[i]] = a

	for k,v in d1.iteritems():
	print json.dumps(v)
No results found