alanbchristie · March 4, 2019 10:18 · alanbchristie · Mar 4, 2019
diff --git a/hekcheck.py b/hekcheck.py
 #!/usr/bin/env python

 # hekcheck.py
 #
 # A simple Python 2.7 module to run basic validation checks
 # on an exported Heketi database file.
 #
 # >     This is a work in progress - driven by trying to fix a damaged
 #       deployment. It does not check everything yet but checks what I
 #       believed to be important things like whether the nodes, volumes,
 #       devices and blocks agree with each other.
 #
 # >     It's a simple piece of dictionary parsing code (designed by
 #       reverse-engineering the JSON file structure) but it found the problems
 #       in my DB file i.e. volume and device bricks with no corresponding
 #       entry in the brick list.
 #
 # The input file (a JSON file) is expected to be an export
 # obtained with something like: -
 #
 #   heketi db export --dbfile /var/lib/heketi/heketi.db --jsonfile db.json
 #
 # Run this utility with the exported file: -
 #
 #   ./hekcheck.py db.json
 #
 # Alan Christie
 # August 2018

 import json
 import os
 import sys

 USAGE = 'Usage: hekcheck.py <jsonfile>'

 # To see lots of stuf...
 verbose = False
 show_brick_sizes = False

 # Go...

 if len(sys.argv) != 2:
    print(USAGE)
    sys.exit(1)

 h_file = sys.argv[1]
 if not os.path.exists(h_file):
    print('No such file')
    print(USAGE)
    sys.exit(1)


 def ascii_encode_dict(data):
    ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
    return dict(map(ascii_encode, pair) for pair in data.items())


 def warning(msg):
    global num_warnings
    print('WARNING: {}'.format(msg))
    num_warnings += 1


 def error(msg):
    global num_errors
    print('ERROR: {}'.format(msg))
    num_errors += 1


 num_warnings = 0
 num_errors = 0
 with open(h_file) as f:
    data = json.load(f, object_hook=ascii_encode_dict)

 #
 # Digest "clusterentries"
 # This section contains a list of node and volume identities in each cluster.
 #
 cluster_ids = data['clusterentries'].keys()
 # Node identities
 # indexed by cluster and in a 'grand' list of
 cluster_node_ids = {}
 node_ids = []
 # Volume identities
 # indexed by cluster and in a 'grand' list of
 cluster_volume_ids = {}
 volume_ids = []
 for cluster_id in cluster_ids:
    if not cluster_id in cluster_volume_ids:
        cluster_volume_ids[cluster_id] = []
        cluster_node_ids[cluster_id] = []
    for node_id in data['clusterentries'][cluster_id]['Info']['nodes']:
        node_id_str = node_id
        cluster_node_ids[cluster_id].append(node_id_str)
        node_ids.append(node_id_str)
    for volume_id in data['clusterentries'][cluster_id]['Info']['volumes']:
        volume_id_str = volume_id
        cluster_volume_ids[cluster_id].append(volume_id_str)
        volume_ids.append(volume_id_str)
 # Summary...
 print('# Clusters = {}'.format(len(cluster_ids)))
 if verbose:
    for cluster_id in cluster_ids:
        print('  {}'.format(cluster_id))
 print('# Nodes = {}'.format(len(node_ids)))
 if verbose:
    for node_id in node_ids:
        print('  {}'.format(node_id))
 print('# Volumes = {}'.format(len(volume_ids)))
 if verbose:
    for volume_id in volume_ids:
        print('  {}'.format(volume_id))

 #
 # Digest "volumeentries"
 # Each volume links back to the cluster and lists the bricks in it
 # and the device it's on.
 #
 volumeentries = data['volumeentries'].keys()
 volume_bricks = {}
 volume_brick_ids = []
 for volumeentry in volumeentries:
    found = False
    volumeentry_str = volumeentry
    volume_bricks[volumeentry_str] = []
    for cluster_id in cluster_ids:
        if volumeentry in cluster_volume_ids[cluster_id]:
            found = True
    if not found:
        error('volume not known to a cluster {}'.format(volumeentry))
    # Check volume is in a known cluster
    volume_cluster_id = data['volumeentries'][volumeentry_str]['Info'][
        'cluster']
    if volume_cluster_id not in cluster_ids:
        error('volume {} cluster {} is not a cluster'.
              format(volumeentry, volume_cluster_id))
    # Collect volume Bricks
    for brick_id in data['volumeentries'][volumeentry_str]['Bricks']:
        brick_id_str = brick_id
        if brick_id_str in volume_brick_ids:
            error('duplicate Brick ID {} '.format(brick_id_str))
        volume_bricks[volumeentry_str].append(brick_id_str)
        volume_brick_ids.append(brick_id_str)
 # Summary...
 print('# Volume bricks = {}'.format(len(volume_brick_ids)))
 if verbose:
    for brick_id in volume_brick_ids:
        print('  {}'.format(brick_id))

 #
 # Digest "deviceentries"
 # The device entry identifies the node it's on and the bricks that are on it
 #
 deviceentries = data['deviceentries'].keys()
 device_ids = []
 device_bricks = {}
 device_brick_ids = []
 for deviceentry in deviceentries:
    device_id_str = deviceentry
    if device_id_str in device_ids:
        error('Duplicate device {}'.format(device_id_str))
    device_ids.append(device_id_str)
    device_bricks[device_id_str] = []
    deviceentry_node_id = data['deviceentries'][deviceentry]['NodeId']
    if deviceentry_node_id not in node_ids:
        error('Device {} node {} not known'.
              format(deviceentry, deviceentry_node_id))
    for brick_id in data['deviceentries'][deviceentry]['Bricks']:
        brick_id_str = brick_id
        if brick_id_str in device_brick_ids:
            error('Device {} Brick {} already known'.
                  format(device_id_str, brick_id_str))
        device_brick_ids.append(brick_id_str)
        device_bricks[device_id_str].append(brick_id_str)
 # Summary...
 print('# Devices = {}'.format(len(device_ids)))
 if verbose:
    for device_id in device_ids:
        print('  {}'.format(device_id))
 print('# Device bricks = {}'.format(len(device_brick_ids)))

 #
 # Digest "brickentries"
 # The bricks identify the device, node and volume they're on
 #
 brickentries = data['brickentries'].keys()
 brickentry_ids = []
 smallest_brick_size_g = None
 smallest_brick_g = None
 largest_brick_size_g = None
 largest_brick_g = None
 for brickentry in brickentries:
    brickentry_id_str = brickentry
    if brickentry_id_str in brickentry_ids:
        error('Brick {} is not unique'.
              format(brickentry_id_str))
    if brickentry_id_str not in volume_brick_ids:
        error('Brick {} is not known to a volume'.
              format(brickentry_id_str))
    if brickentry_id_str not in device_brick_ids:
        error('Brick {} is not known to a device'.
              format(brickentry_id_str))
    brickentry_ids.append(brickentry_id_str)
    brickentry_node_id = data['brickentries'][brickentry]['Info']['node']
    brickentry_volume_id = data['brickentries'][brickentry]['Info']['volume']
    brickentry_device_id = data['brickentries'][brickentry]['Info']['device']
    brickentry_path = data['brickentries'][brickentry]['Info']['path']
    brickentry_size_g = data['brickentries'][brickentry]['Info']['size'] / 1000000
    brickentry_pending_id = data['brickentries'][brickentry]['Pending']['Id']
    if brickentry_node_id not in node_ids:
        error('Brick {} node {} not known'.
              format(brickentry, brickentry_node_id))
    if brickentry_volume_id not in volume_ids:
        error('Brick {} volume {} not known'.
              format(brickentry, brickentry_volume_id))
    if brickentry_volume_id not in volume_ids:
        error('Brick {} device {} not known'.
              format(brickentry, brickentry_device_id))
    if not brickentry_path:
        error('Brick {} path is blank'.format(brickentry))
    if brickentry_size_g <= 0:
        error('Brick {} has odd size {}'.format(brickentry, brickentry_size))
    else:
        if smallest_brick_size_g is None or brickentry_size_g < smallest_brick_size_g:
            smallest_brick_size_g = brickentry_size_g
            smallest_brick = brickentry_id_str
        if largest_brick_size_g is None or brickentry_size_g > largest_brick_size_g:
            largest_brick_size_g = brickentry_size_g
            largest_brick = brickentry_id_str
    if brickentry_pending_id:
        warning('Brick {} is pending on ID {}'.
                format(brickentry, brickentry_pending_id))
 # Summary...
 print('# Bricks = {}'.format(len(brickentry_ids)))
 if show_brick_sizes:
    print('# Smallest brick size = {:,} GiB ({})'.format(smallest_brick_size_g,
                                                         smallest_brick))
    print('# Largest brick size = {:,} GiB ({})'.format(largest_brick_size_g,
                                                        largest_brick))
 if verbose:
    for brickentry_id in brickentry_ids:
        print('  {}'.format(brickentry_id))

 #
 # Digest "pendingoperations"
 #
 pendingoperations = data['pendingoperations'].keys()
 if len(pendingoperations):
    warning('There are pending operations ({})'.
            format(len(pendingoperations)))

 # We've looked at each major section so let's do some
 # cross-referential tests...
 #
 # Do the IDs listed in brickentries
 # match the bricks listed against the volumes?
 # i.e. is each brick in the volume list in the brickentries list?
 if len(brickentry_ids) != len(volume_brick_ids):
    warning('Number of brickentries ({})'
            ' differs from the number of volume bricks ({})'.
            format(len(brickentry_ids), len(volume_brick_ids)))
    for brick_id in volume_brick_ids:
        if brick_id not in brickentry_ids:
            # Which volume is this brick in?
            lost_volume_id = None
            for volume_id in volume_bricks:
                if brick_id in volume_bricks[volume_id]:
                    lost_volume_id = volume_id
                    break
            error('Volume {} brick {} not in brickentries'.
                  format(lost_volume_id, brick_id))

 # Do the IDs listed in brickentries
 # match the bricks listed against the devices?
 # i.e. is each brick in the device list in the brickentries list?
 if len(brickentry_ids) != len(device_brick_ids):
    warning('Number of brickentries ({})'
            ' differs from the number of device bricks ({})'.
            format(len(brickentry_ids), len(device_brick_ids)))
    for brick_id in device_brick_ids:
        if brick_id not in brickentry_ids:
            # Which device is this brick in?
            lost_device_id = None
            for device_id in device_bricks:
                if brick_id in device_bricks[device_id]:
                    lost_device_id = device_id
                    break
            error('Device {} brick {} not in brickentries'.
                  format(lost_device_id, brick_id))

 # OK?
 if num_warnings or num_errors:
    print('Done [There were issues]')
 else:
    print('Done [Looks Good]')
	#!/usr/bin/env python

	# hekcheck.py
	#
	# A simple Python 2.7 module to run basic validation checks
	# on an exported Heketi database file.
	#
	# > This is a work in progress - driven by trying to fix a damaged
	# deployment. It does not check everything yet but checks what I
	# believed to be important things like whether the nodes, volumes,
	# devices and blocks agree with each other.
	#
	# > It's a simple piece of dictionary parsing code (designed by
	# reverse-engineering the JSON file structure) but it found the problems
	# in my DB file i.e. volume and device bricks with no corresponding
	# entry in the brick list.
	#
	# The input file (a JSON file) is expected to be an export
	# obtained with something like: -
	#
	# heketi db export --dbfile /var/lib/heketi/heketi.db --jsonfile db.json
	#
	# Run this utility with the exported file: -
	#
	# ./hekcheck.py db.json
	#
	# Alan Christie
	# August 2018

	import json
	import os
	import sys

	USAGE = 'Usage: hekcheck.py <jsonfile>'

	# To see lots of stuf...
	verbose = False
	show_brick_sizes = False

	# Go...

	if len(sys.argv) != 2:
	print(USAGE)
	sys.exit(1)

	h_file = sys.argv[1]
	if not os.path.exists(h_file):
	print('No such file')
	print(USAGE)
	sys.exit(1)


	def ascii_encode_dict(data):
	ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
	return dict(map(ascii_encode, pair) for pair in data.items())


	def warning(msg):
	global num_warnings
	print('WARNING: {}'.format(msg))
	num_warnings += 1


	def error(msg):
	global num_errors
	print('ERROR: {}'.format(msg))
	num_errors += 1


	num_warnings = 0
	num_errors = 0
	with open(h_file) as f:
	data = json.load(f, object_hook=ascii_encode_dict)

	#
	# Digest "clusterentries"
	# This section contains a list of node and volume identities in each cluster.
	#
	cluster_ids = data['clusterentries'].keys()
	# Node identities
	# indexed by cluster and in a 'grand' list of
	cluster_node_ids = {}
	node_ids = []
	# Volume identities
	# indexed by cluster and in a 'grand' list of
	cluster_volume_ids = {}
	volume_ids = []
	for cluster_id in cluster_ids:
	if not cluster_id in cluster_volume_ids:
	cluster_volume_ids[cluster_id] = []
	cluster_node_ids[cluster_id] = []
	for node_id in data['clusterentries'][cluster_id]['Info']['nodes']:
	node_id_str = node_id
	cluster_node_ids[cluster_id].append(node_id_str)
	node_ids.append(node_id_str)
	for volume_id in data['clusterentries'][cluster_id]['Info']['volumes']:
	volume_id_str = volume_id
	cluster_volume_ids[cluster_id].append(volume_id_str)
	volume_ids.append(volume_id_str)
	# Summary...
	print('# Clusters = {}'.format(len(cluster_ids)))
	if verbose:
	for cluster_id in cluster_ids:
	print(' {}'.format(cluster_id))
	print('# Nodes = {}'.format(len(node_ids)))
	if verbose:
	for node_id in node_ids:
	print(' {}'.format(node_id))
	print('# Volumes = {}'.format(len(volume_ids)))
	if verbose:
	for volume_id in volume_ids:
	print(' {}'.format(volume_id))

	#
	# Digest "volumeentries"
	# Each volume links back to the cluster and lists the bricks in it
	# and the device it's on.
	#
	volumeentries = data['volumeentries'].keys()
	volume_bricks = {}
	volume_brick_ids = []
	for volumeentry in volumeentries:
	found = False
	volumeentry_str = volumeentry
	volume_bricks[volumeentry_str] = []
	for cluster_id in cluster_ids:
	if volumeentry in cluster_volume_ids[cluster_id]:
	found = True
	if not found:
	error('volume not known to a cluster {}'.format(volumeentry))
	# Check volume is in a known cluster
	volume_cluster_id = data['volumeentries'][volumeentry_str]['Info'][
	'cluster']
	if volume_cluster_id not in cluster_ids:
	error('volume {} cluster {} is not a cluster'.
	format(volumeentry, volume_cluster_id))
	# Collect volume Bricks
	for brick_id in data['volumeentries'][volumeentry_str]['Bricks']:
	brick_id_str = brick_id
	if brick_id_str in volume_brick_ids:
	error('duplicate Brick ID {} '.format(brick_id_str))
	volume_bricks[volumeentry_str].append(brick_id_str)
	volume_brick_ids.append(brick_id_str)
	# Summary...
	print('# Volume bricks = {}'.format(len(volume_brick_ids)))
	if verbose:
	for brick_id in volume_brick_ids:
	print(' {}'.format(brick_id))

	#
	# Digest "deviceentries"
	# The device entry identifies the node it's on and the bricks that are on it
	#
	deviceentries = data['deviceentries'].keys()
	device_ids = []
	device_bricks = {}
	device_brick_ids = []
	for deviceentry in deviceentries:
	device_id_str = deviceentry
	if device_id_str in device_ids:
	error('Duplicate device {}'.format(device_id_str))
	device_ids.append(device_id_str)
	device_bricks[device_id_str] = []
	deviceentry_node_id = data['deviceentries'][deviceentry]['NodeId']
	if deviceentry_node_id not in node_ids:
	error('Device {} node {} not known'.
	format(deviceentry, deviceentry_node_id))
	for brick_id in data['deviceentries'][deviceentry]['Bricks']:
	brick_id_str = brick_id
	if brick_id_str in device_brick_ids:
	error('Device {} Brick {} already known'.
	format(device_id_str, brick_id_str))
	device_brick_ids.append(brick_id_str)
	device_bricks[device_id_str].append(brick_id_str)
	# Summary...
	print('# Devices = {}'.format(len(device_ids)))
	if verbose:
	for device_id in device_ids:
	print(' {}'.format(device_id))
	print('# Device bricks = {}'.format(len(device_brick_ids)))

	#
	# Digest "brickentries"
	# The bricks identify the device, node and volume they're on
	#
	brickentries = data['brickentries'].keys()
	brickentry_ids = []
	smallest_brick_size_g = None
	smallest_brick_g = None
	largest_brick_size_g = None
	largest_brick_g = None
	for brickentry in brickentries:
	brickentry_id_str = brickentry
	if brickentry_id_str in brickentry_ids:
	error('Brick {} is not unique'.
	format(brickentry_id_str))
	if brickentry_id_str not in volume_brick_ids:
	error('Brick {} is not known to a volume'.
	format(brickentry_id_str))
	if brickentry_id_str not in device_brick_ids:
	error('Brick {} is not known to a device'.
	format(brickentry_id_str))
	brickentry_ids.append(brickentry_id_str)
	brickentry_node_id = data['brickentries'][brickentry]['Info']['node']
	brickentry_volume_id = data['brickentries'][brickentry]['Info']['volume']
	brickentry_device_id = data['brickentries'][brickentry]['Info']['device']
	brickentry_path = data['brickentries'][brickentry]['Info']['path']
	brickentry_size_g = data['brickentries'][brickentry]['Info']['size'] / 1000000
	brickentry_pending_id = data['brickentries'][brickentry]['Pending']['Id']
	if brickentry_node_id not in node_ids:
	error('Brick {} node {} not known'.
	format(brickentry, brickentry_node_id))
	if brickentry_volume_id not in volume_ids:
	error('Brick {} volume {} not known'.
	format(brickentry, brickentry_volume_id))
	if brickentry_volume_id not in volume_ids:
	error('Brick {} device {} not known'.
	format(brickentry, brickentry_device_id))
	if not brickentry_path:
	error('Brick {} path is blank'.format(brickentry))
	if brickentry_size_g <= 0:
	error('Brick {} has odd size {}'.format(brickentry, brickentry_size))
	else:
	if smallest_brick_size_g is None or brickentry_size_g < smallest_brick_size_g:
	smallest_brick_size_g = brickentry_size_g
	smallest_brick = brickentry_id_str
	if largest_brick_size_g is None or brickentry_size_g > largest_brick_size_g:
	largest_brick_size_g = brickentry_size_g
	largest_brick = brickentry_id_str
	if brickentry_pending_id:
	warning('Brick {} is pending on ID {}'.
	format(brickentry, brickentry_pending_id))
	# Summary...
	print('# Bricks = {}'.format(len(brickentry_ids)))
	if show_brick_sizes:
	print('# Smallest brick size = {:,} GiB ({})'.format(smallest_brick_size_g,
	smallest_brick))
	print('# Largest brick size = {:,} GiB ({})'.format(largest_brick_size_g,
	largest_brick))
	if verbose:
	for brickentry_id in brickentry_ids:
	print(' {}'.format(brickentry_id))

	#
	# Digest "pendingoperations"
	#
	pendingoperations = data['pendingoperations'].keys()
	if len(pendingoperations):
	warning('There are pending operations ({})'.
	format(len(pendingoperations)))

	# We've looked at each major section so let's do some
	# cross-referential tests...
	#
	# Do the IDs listed in brickentries
	# match the bricks listed against the volumes?
	# i.e. is each brick in the volume list in the brickentries list?
	if len(brickentry_ids) != len(volume_brick_ids):
	warning('Number of brickentries ({})'
	' differs from the number of volume bricks ({})'.
	format(len(brickentry_ids), len(volume_brick_ids)))
	for brick_id in volume_brick_ids:
	if brick_id not in brickentry_ids:
	# Which volume is this brick in?
	lost_volume_id = None
	for volume_id in volume_bricks:
	if brick_id in volume_bricks[volume_id]:
	lost_volume_id = volume_id
	break
	error('Volume {} brick {} not in brickentries'.
	format(lost_volume_id, brick_id))

	# Do the IDs listed in brickentries
	# match the bricks listed against the devices?
	# i.e. is each brick in the device list in the brickentries list?
	if len(brickentry_ids) != len(device_brick_ids):
	warning('Number of brickentries ({})'
	' differs from the number of device bricks ({})'.
	format(len(brickentry_ids), len(device_brick_ids)))
	for brick_id in device_brick_ids:
	if brick_id not in brickentry_ids:
	# Which device is this brick in?
	lost_device_id = None
	for device_id in device_bricks:
	if brick_id in device_bricks[device_id]:
	lost_device_id = device_id
	break
	error('Device {} brick {} not in brickentries'.
	format(lost_device_id, brick_id))

	# OK?
	if num_warnings or num_errors:
	print('Done [There were issues]')
	else:
	print('Done [Looks Good]')