Last active
January 2, 2016 01:29
-
-
Save tjake/8230602 to your computer and use it in GitHub Desktop.
script to slowly repair the primary vnode ranges for a node. If it fails you can restart from last sucessfull range. It waits between ranges to avoid buildup of compactions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os, sys, re, subprocess, time | |
def usage(): | |
print "Usage:" | |
print " repair hostname keyspace [offset]\n" | |
exit(1) | |
'''Grab the hostid guid from nodetool info''' | |
def get_hostid(host): | |
p = subprocess.Popen([ "nodetool", "--host", host, "info"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
for line in p.stdout: | |
id = re.search("id\s+:\s+([a-f0-9\-]+)",line, re.I) | |
if id: | |
return id.group(1); | |
raise RuntimeError("Missing hostid"); | |
'''Grab the ip matching the hostid in nodetool status''' | |
def get_ip(hostid): | |
p = subprocess.Popen([ "nodetool", "--host", host, "status"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
for line in p.stdout: | |
if hostid in line: | |
return re.search("(\d+\.\d+.\d+.\d+)",line).group(1); | |
raise RuntimeError("Missing ip for hostid"); | |
'''Find the primary token ranges for specified ip''' | |
#!/usr/bin/python -u | |
from datetime import datetime | |
import os, sys, re, subprocess, time | |
def usage(): | |
print "Usage:" | |
print " repair hostname keyspace [offset]\n" | |
exit(1) | |
'''Grab the hostid guid from nodetool info''' | |
def get_hostid(host): | |
p = subprocess.Popen([ "nodetool", "--host", host, "info"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
for line in p.stdout: | |
id = re.search("id\s+:\s+([a-f0-9\-]+)",line, re.I) | |
if id: | |
return id.group(1); | |
raise RuntimeError("Missing hostid"); | |
'''Grab the ip matching the hostid in nodetool status''' | |
def get_ip(hostid): | |
p = subprocess.Popen([ "nodetool", "--host", host, "status"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
for line in p.stdout: | |
if hostid in line: | |
return re.search("(\d+\.\d+.\d+.\d+)",line).group(1); | |
raise RuntimeError("Missing ip for hostid"); | |
'''Find the primary token ranges for specified ip''' | |
def get_tokens(ip, keyspace): | |
p = subprocess.Popen([ "nodetool", "--host", host, "describering", keyspace], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE); | |
tokens = []; | |
for line in p.stdout: | |
if "endpoints:["+ip in line: | |
ranges = re.search("start_token:([\-\d]+),\s+end_token:([\-\d]+)",line); | |
if ranges: | |
tokens.append([ranges.group(1), ranges.group(2)]); | |
if len(tokens) == 0: | |
raise RuntimeError("No primary tokens for ip"+p.stdout.read()); | |
return tokens; | |
'''Gets the pending compactions for a node''' | |
def get_pending_compactions(host): | |
p = subprocess.Popen([ "nodetool", "--host", host, "compactionstats", keyspace], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
for line in p.stdout: | |
if "pending tasks" in line: | |
return re.search("^pending tasks:\s*(\d+)",line).group(1); | |
'''Kicks off repair in primary vnode ranges''' | |
def repair(host,keyspace,offset): | |
ranges = get_tokens(get_ip(get_hostid(host)), keyspace); | |
coffset = offset; | |
for range in ranges[offset:]: | |
p = subprocess.Popen(["nodetool", "--host", host, "repair", "-pr", "-st",range[0],"-et",range[1],"-local"], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
print "repairing range "+str(coffset)+" "+str(datetime.now()); | |
e = p.wait(); | |
if e != 0: | |
raise RuntimeError("Error encountered: "+p.stderr.readline()); | |
while(1): | |
pending = get_pending_compactions(host); | |
if int(pending) > 10: | |
print "waiting for compactions to drop: "+str(pending) | |
time.sleep(10); | |
else: | |
break; | |
coffset += 1; | |
if len(sys.argv) < 3: | |
usage(); | |
host = sys.argv[1]; | |
keyspace = sys.argv[2]; | |
offset = 0; | |
if len(sys.argv) == 4: | |
offset = int(sys.argv[3]); | |
repair(host,keyspace,offset); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment