Last active
March 4, 2019 10:18
-
-
Save alanbchristie/9a364070c3c619499df971e6e09d8c65 to your computer and use it in GitHub Desktop.
A simple Python 2.7 module to run basic validation checks on an exported Heketi database file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# hekcheck.py | |
# | |
# A simple Python 2.7 module to run basic validation checks | |
# on an exported Heketi database file. | |
# | |
# > This is a work in progress - driven by trying to fix a damaged | |
# deployment. It does not check everything yet but checks what I | |
# believed to be important things like whether the nodes, volumes, | |
# devices and blocks agree with each other. | |
# | |
# > It's a simple piece of dictionary parsing code (designed by | |
# reverse-engineering the JSON file structure) but it found the problems | |
# in my DB file i.e. volume and device bricks with no corresponding | |
# entry in the brick list. | |
# | |
# The input file (a JSON file) is expected to be an export | |
# obtained with something like: - | |
# | |
# heketi db export --dbfile /var/lib/heketi/heketi.db --jsonfile db.json | |
# | |
# Run this utility with the exported file: - | |
# | |
# ./hekcheck.py db.json | |
# | |
# Alan Christie | |
# August 2018 | |
import json | |
import os | |
import sys | |
USAGE = 'Usage: hekcheck.py <jsonfile>' | |
# To see lots of stuf... | |
verbose = False | |
show_brick_sizes = False | |
# Go... | |
if len(sys.argv) != 2: | |
print(USAGE) | |
sys.exit(1) | |
h_file = sys.argv[1] | |
if not os.path.exists(h_file): | |
print('No such file') | |
print(USAGE) | |
sys.exit(1) | |
def ascii_encode_dict(data): | |
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x | |
return dict(map(ascii_encode, pair) for pair in data.items()) | |
def warning(msg): | |
global num_warnings | |
print('WARNING: {}'.format(msg)) | |
num_warnings += 1 | |
def error(msg): | |
global num_errors | |
print('ERROR: {}'.format(msg)) | |
num_errors += 1 | |
num_warnings = 0 | |
num_errors = 0 | |
with open(h_file) as f: | |
data = json.load(f, object_hook=ascii_encode_dict) | |
# | |
# Digest "clusterentries" | |
# This section contains a list of node and volume identities in each cluster. | |
# | |
cluster_ids = data['clusterentries'].keys() | |
# Node identities | |
# indexed by cluster and in a 'grand' list of | |
cluster_node_ids = {} | |
node_ids = [] | |
# Volume identities | |
# indexed by cluster and in a 'grand' list of | |
cluster_volume_ids = {} | |
volume_ids = [] | |
for cluster_id in cluster_ids: | |
if not cluster_id in cluster_volume_ids: | |
cluster_volume_ids[cluster_id] = [] | |
cluster_node_ids[cluster_id] = [] | |
for node_id in data['clusterentries'][cluster_id]['Info']['nodes']: | |
node_id_str = node_id | |
cluster_node_ids[cluster_id].append(node_id_str) | |
node_ids.append(node_id_str) | |
for volume_id in data['clusterentries'][cluster_id]['Info']['volumes']: | |
volume_id_str = volume_id | |
cluster_volume_ids[cluster_id].append(volume_id_str) | |
volume_ids.append(volume_id_str) | |
# Summary... | |
print('# Clusters = {}'.format(len(cluster_ids))) | |
if verbose: | |
for cluster_id in cluster_ids: | |
print(' {}'.format(cluster_id)) | |
print('# Nodes = {}'.format(len(node_ids))) | |
if verbose: | |
for node_id in node_ids: | |
print(' {}'.format(node_id)) | |
print('# Volumes = {}'.format(len(volume_ids))) | |
if verbose: | |
for volume_id in volume_ids: | |
print(' {}'.format(volume_id)) | |
# | |
# Digest "volumeentries" | |
# Each volume links back to the cluster and lists the bricks in it | |
# and the device it's on. | |
# | |
volumeentries = data['volumeentries'].keys() | |
volume_bricks = {} | |
volume_brick_ids = [] | |
for volumeentry in volumeentries: | |
found = False | |
volumeentry_str = volumeentry | |
volume_bricks[volumeentry_str] = [] | |
for cluster_id in cluster_ids: | |
if volumeentry in cluster_volume_ids[cluster_id]: | |
found = True | |
if not found: | |
error('volume not known to a cluster {}'.format(volumeentry)) | |
# Check volume is in a known cluster | |
volume_cluster_id = data['volumeentries'][volumeentry_str]['Info'][ | |
'cluster'] | |
if volume_cluster_id not in cluster_ids: | |
error('volume {} cluster {} is not a cluster'. | |
format(volumeentry, volume_cluster_id)) | |
# Collect volume Bricks | |
for brick_id in data['volumeentries'][volumeentry_str]['Bricks']: | |
brick_id_str = brick_id | |
if brick_id_str in volume_brick_ids: | |
error('duplicate Brick ID {} '.format(brick_id_str)) | |
volume_bricks[volumeentry_str].append(brick_id_str) | |
volume_brick_ids.append(brick_id_str) | |
# Summary... | |
print('# Volume bricks = {}'.format(len(volume_brick_ids))) | |
if verbose: | |
for brick_id in volume_brick_ids: | |
print(' {}'.format(brick_id)) | |
# | |
# Digest "deviceentries" | |
# The device entry identifies the node it's on and the bricks that are on it | |
# | |
deviceentries = data['deviceentries'].keys() | |
device_ids = [] | |
device_bricks = {} | |
device_brick_ids = [] | |
for deviceentry in deviceentries: | |
device_id_str = deviceentry | |
if device_id_str in device_ids: | |
error('Duplicate device {}'.format(device_id_str)) | |
device_ids.append(device_id_str) | |
device_bricks[device_id_str] = [] | |
deviceentry_node_id = data['deviceentries'][deviceentry]['NodeId'] | |
if deviceentry_node_id not in node_ids: | |
error('Device {} node {} not known'. | |
format(deviceentry, deviceentry_node_id)) | |
for brick_id in data['deviceentries'][deviceentry]['Bricks']: | |
brick_id_str = brick_id | |
if brick_id_str in device_brick_ids: | |
error('Device {} Brick {} already known'. | |
format(device_id_str, brick_id_str)) | |
device_brick_ids.append(brick_id_str) | |
device_bricks[device_id_str].append(brick_id_str) | |
# Summary... | |
print('# Devices = {}'.format(len(device_ids))) | |
if verbose: | |
for device_id in device_ids: | |
print(' {}'.format(device_id)) | |
print('# Device bricks = {}'.format(len(device_brick_ids))) | |
# | |
# Digest "brickentries" | |
# The bricks identify the device, node and volume they're on | |
# | |
brickentries = data['brickentries'].keys() | |
brickentry_ids = [] | |
smallest_brick_size_g = None | |
smallest_brick_g = None | |
largest_brick_size_g = None | |
largest_brick_g = None | |
for brickentry in brickentries: | |
brickentry_id_str = brickentry | |
if brickentry_id_str in brickentry_ids: | |
error('Brick {} is not unique'. | |
format(brickentry_id_str)) | |
if brickentry_id_str not in volume_brick_ids: | |
error('Brick {} is not known to a volume'. | |
format(brickentry_id_str)) | |
if brickentry_id_str not in device_brick_ids: | |
error('Brick {} is not known to a device'. | |
format(brickentry_id_str)) | |
brickentry_ids.append(brickentry_id_str) | |
brickentry_node_id = data['brickentries'][brickentry]['Info']['node'] | |
brickentry_volume_id = data['brickentries'][brickentry]['Info']['volume'] | |
brickentry_device_id = data['brickentries'][brickentry]['Info']['device'] | |
brickentry_path = data['brickentries'][brickentry]['Info']['path'] | |
brickentry_size_g = data['brickentries'][brickentry]['Info']['size'] / 1000000 | |
brickentry_pending_id = data['brickentries'][brickentry]['Pending']['Id'] | |
if brickentry_node_id not in node_ids: | |
error('Brick {} node {} not known'. | |
format(brickentry, brickentry_node_id)) | |
if brickentry_volume_id not in volume_ids: | |
error('Brick {} volume {} not known'. | |
format(brickentry, brickentry_volume_id)) | |
if brickentry_volume_id not in volume_ids: | |
error('Brick {} device {} not known'. | |
format(brickentry, brickentry_device_id)) | |
if not brickentry_path: | |
error('Brick {} path is blank'.format(brickentry)) | |
if brickentry_size_g <= 0: | |
error('Brick {} has odd size {}'.format(brickentry, brickentry_size)) | |
else: | |
if smallest_brick_size_g is None or brickentry_size_g < smallest_brick_size_g: | |
smallest_brick_size_g = brickentry_size_g | |
smallest_brick = brickentry_id_str | |
if largest_brick_size_g is None or brickentry_size_g > largest_brick_size_g: | |
largest_brick_size_g = brickentry_size_g | |
largest_brick = brickentry_id_str | |
if brickentry_pending_id: | |
warning('Brick {} is pending on ID {}'. | |
format(brickentry, brickentry_pending_id)) | |
# Summary... | |
print('# Bricks = {}'.format(len(brickentry_ids))) | |
if show_brick_sizes: | |
print('# Smallest brick size = {:,} GiB ({})'.format(smallest_brick_size_g, | |
smallest_brick)) | |
print('# Largest brick size = {:,} GiB ({})'.format(largest_brick_size_g, | |
largest_brick)) | |
if verbose: | |
for brickentry_id in brickentry_ids: | |
print(' {}'.format(brickentry_id)) | |
# | |
# Digest "pendingoperations" | |
# | |
pendingoperations = data['pendingoperations'].keys() | |
if len(pendingoperations): | |
warning('There are pending operations ({})'. | |
format(len(pendingoperations))) | |
# We've looked at each major section so let's do some | |
# cross-referential tests... | |
# | |
# Do the IDs listed in brickentries | |
# match the bricks listed against the volumes? | |
# i.e. is each brick in the volume list in the brickentries list? | |
if len(brickentry_ids) != len(volume_brick_ids): | |
warning('Number of brickentries ({})' | |
' differs from the number of volume bricks ({})'. | |
format(len(brickentry_ids), len(volume_brick_ids))) | |
for brick_id in volume_brick_ids: | |
if brick_id not in brickentry_ids: | |
# Which volume is this brick in? | |
lost_volume_id = None | |
for volume_id in volume_bricks: | |
if brick_id in volume_bricks[volume_id]: | |
lost_volume_id = volume_id | |
break | |
error('Volume {} brick {} not in brickentries'. | |
format(lost_volume_id, brick_id)) | |
# Do the IDs listed in brickentries | |
# match the bricks listed against the devices? | |
# i.e. is each brick in the device list in the brickentries list? | |
if len(brickentry_ids) != len(device_brick_ids): | |
warning('Number of brickentries ({})' | |
' differs from the number of device bricks ({})'. | |
format(len(brickentry_ids), len(device_brick_ids))) | |
for brick_id in device_brick_ids: | |
if brick_id not in brickentry_ids: | |
# Which device is this brick in? | |
lost_device_id = None | |
for device_id in device_bricks: | |
if brick_id in device_bricks[device_id]: | |
lost_device_id = device_id | |
break | |
error('Device {} brick {} not in brickentries'. | |
format(lost_device_id, brick_id)) | |
# OK? | |
if num_warnings or num_errors: | |
print('Done [There were issues]') | |
else: | |
print('Done [Looks Good]') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
...it might also be worth familiarising yourself with the troubleshooting guide at https://github.com/heketi/heketi/blob/master/docs/troubleshooting.md