Created
May 5, 2020 15:34
-
-
Save linuxkidd/019d54ecbdb4bc0d53b37fd1125ec5b1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import argparse,json,math,re,signal,sys | |
| from subprocess import Popen, PIPE | |
| from StringIO import StringIO | |
| from pprint import pprint | |
| parser = argparse.ArgumentParser(description='Custom script inconsistent PG cleanup') | |
| parser.add_argument("--id",dest="pgid",required=True,help="Provide the PG id for this list-inconstent-obj output.") | |
| args=parser.parse_args() | |
| pgid=args.pgid | |
| #debug=True | |
| debug=False | |
| if not sys.stdin.isatty(): | |
| print("Loading list-inconsistent-obj from stdin") | |
| obj=json.load(sys.stdin) | |
| else: | |
| print("Fetching list-inconsistent-obj for PG {}".format(pgid)) | |
| cmd=["rados","list-inconsistent-obj",pgid] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| exit(1) | |
| else: | |
| obj=json.load(StringIO(cmdout)) | |
| collonregex=re.compile("::*") | |
| unsaferegex=re.compile("omap_") | |
| hadincons=0 | |
| printrepair=0 | |
| def signal_handler(signal, frame): | |
| print('You pressed Ctrl+C! Exiting...') | |
| print('') | |
| exit(1) | |
| signal.signal(signal.SIGINT, signal_handler) | |
| def parse_shards(shards): | |
| master={} | |
| currentosd=-1 | |
| shardcount=0 | |
| for shard in shards: | |
| shardcount+=1 | |
| currentosd=shard['osd'] | |
| for key,val in shard.items(): | |
| if key in ["osd","primary"]: | |
| continue | |
| if key not in master: | |
| master[key]={} | |
| if key=="errors": | |
| for error in val: | |
| if re.match(unsaferegex,error) is not None: | |
| print("Unsafe to repair omap mismatch with this script. Exiting to preserve data.") | |
| exit(2) | |
| if error not in master[key]: | |
| master[key][error]=[] | |
| master[key][error].append(currentosd) | |
| else: | |
| if val not in master[key]: | |
| master[key][val]=[] | |
| master[key][val].append(currentosd) | |
| goodosd=-1 | |
| blacklistosd={} | |
| for key,value in master.items(): | |
| if key=="errors": | |
| for param,osds in value.items(): | |
| if len(osds)<shardcount: | |
| for osd in osds: | |
| blacklistosd[osd]=1 | |
| continue | |
| for param,osds in value.items(): | |
| if len(osds)==shardcount: | |
| continue | |
| if len(osds)==(shardcount-1): | |
| for i in range(0,len(osds)-1): | |
| if osds[i] in blacklistosd: | |
| continue | |
| goodosd=osds[i] | |
| if goodosd==-1: | |
| continue | |
| print("Matched OSD data:") | |
| print("Key,Val,OSD") | |
| print("{},{},{}".format(key,param,goodosd)) | |
| if goodosd!=-1: | |
| break | |
| if goodosd==-1: | |
| if len(blacklistosd)==0: | |
| print("All OSDs errored, no difference among them, Using first OSD: osd.{}".format(shards[0]['osd'])) | |
| return shards[0]['osd'] | |
| for i in range(0,shardcount-1): | |
| if shards[i]['osd'] in blacklistosd: | |
| continue | |
| goodosd=shards[i]['osd'] | |
| if goodosd==-1: | |
| print("Failed to find good osd. Please manually review this PG.") | |
| exit(1) | |
| print("Using non-errored OSD: osd.{}".format(goodosd)) | |
| return goodosd | |
| if len(obj['inconsistents'])==0: | |
| print("No inconsistent objects in PG {}. Issuing repair.".format(pgid)) | |
| cmd=["ceph","pg","repair",pgid] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| exit(1) | |
| exit(0) | |
| cmd=["ceph","osd","lspools","--format=json"] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| exit(1) | |
| poolobj=json.load(StringIO(cmdout)) | |
| pools={} | |
| for pool in poolobj: | |
| pools[pool['poolnum']]=pool['poolname'] | |
| needrepair=0 | |
| for incons in obj['inconsistents']: | |
| for error in incons['errors']: | |
| if re.match(unsaferegex,error) is not None: | |
| print("Unsafe to repair omap mismatch with this script. Exiting to preserve data.") | |
| exit(2) | |
| objname=incons['object']['name'] | |
| snap=incons['object']['snap'] | |
| poolid=-1 | |
| if type(incons['selected_object_info']) in [unicode,str]: | |
| fnamedata=collonregex.split(incons['selected_object_info']) | |
| fname="{}__{}_*_{}".format(fnamedata[2],snap,fnamedata[0]) | |
| poolid=fnamedata[0] | |
| else: | |
| fname="{}__{}_*_{}".format(incons['selected_object_info']['oid']['oid'],snap,incons['selected_object_info']['oid']['pool']) | |
| poolid=incons['selected_object_info']['oid']['pool'] | |
| print("Object: {}".format(objname)) | |
| print("Pool ID: {}".format(poolid)) | |
| print("Filename: {}".format(fname)) | |
| goodosd=parse_shards(incons['shards']) | |
| if debug: | |
| print("NODENAME=$(ceph osd find {} | awk '/\"host\"/ {{gsub(/[\",]/,\"\",$NF); print $NF}}')".format(goodosd)) | |
| print("ssh ${{NODENAME}} 'find /var/lib/ceph/osd/ceph-{0:d}/current/{1:s}_head/ -name {2:s} -exec cp -v {{}} /tmp/ \;'".format(goodosd,pgid,fname)) | |
| print("scp ${{NODENAME}}:/tmp/{0:s} /tmp".format(fname)) | |
| print("rados put -p {0:s} {1:s} /tmp/{2:s}\n".format(pools[int(poolid)],objname,fname)) | |
| else: | |
| ## Identify OSD host to find/copy object from | |
| print("Finding osd {}".format(goodosd)) | |
| cmd=["ceph","osd","find",str(goodosd)] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| continue | |
| cmd_obj=json.load(StringIO(cmdout)) | |
| osdhost=cmd_obj["crush_location"]["host"] | |
| print("Finding object {} on OSD {} from host {}".format(objname,goodosd,osdhost)) | |
| cmd=["ssh",osdhost,"find /var/lib/ceph/osd/ceph-{}/current/{}_head/ -name {} -exec cp -v {{}} /tmp/ \;".format(goodosd,pgid,fname)] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| continue | |
| print("Copying {} from {}".format(fname,goodosd)) | |
| cmd=["scp","{}:/tmp/{}".format(osdhost,fname),"/tmp/"] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| continue | |
| print("Performing rados put of object {} from file {} into pool {}".format(objname,fname,pools[int(poolid)])) | |
| cmd=["rados","-p",pools[int(poolid)],"put",objname,"/tmp/{}".format(fname)] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| continue | |
| needrepair=1 | |
| print("PG {} object {} found, copied, put".format(pgid,objname)) | |
| if needrepair==1: | |
| print("Issuing repair for pg {} ".format(pgid)) | |
| cmd=["ceph","pg","repair",pgid] | |
| cmd_pipe = Popen(cmd,stdout=PIPE,stderr=PIPE) | |
| cmdout,cmderr = cmd_pipe.communicate() | |
| if cmd_pipe.returncode!=0: | |
| print("Command: {}\nFailed: {}\nExit Code: {}".format(cmd,cmderr,cmd_pipe.returncode)) | |
| exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment