Last active
December 11, 2015 08:48
-
-
Save ykhrustalev/5926e3ddb6a72b5e3597 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import grp | |
import json | |
import logging | |
import optparse | |
import os | |
import pwd | |
import re | |
import subprocess | |
import sys | |
logging.basicConfig() | |
logger = logging.getLogger('helper') | |
def execute(cmd, **kwargs): | |
kwargs = kwargs.copy() | |
if 'stdout' not in kwargs: | |
kwargs['stdout'] = subprocess.PIPE | |
if 'stderr' not in kwargs: | |
kwargs['stderr'] = subprocess.PIPE | |
p = subprocess.Popen(cmd, **kwargs) | |
out, err = p.communicate() | |
rc = p.returncode | |
if rc == 0: | |
logger.debug("running: %s", ' '.join(cmd)) | |
logger.debug("return code: %s", rc) | |
logger.debug("stdout: %s", out) | |
logger.debug("stderr: %s", err) | |
else: | |
logger.error("Failed to run: %s", ' '.join(cmd)) | |
logger.error("return code: %s", rc) | |
logger.error("stdout: %s", out) | |
logger.error("stderr: %s", err) | |
raise Exception('Failed to run command') | |
return rc, out, err | |
def current_node(): | |
_, out, _ = execute(['gluster', 'peer', 'status']) | |
for line in out.split('\n'): | |
m = re.match(r'^Hostname:\s*(.+)', line) | |
if m: | |
val = m.group(1) | |
logger.debug("current node: %s", val) | |
return val | |
raise Exception('can not identify node >>%s<<' % out) | |
def get_bricks(volume): | |
node = current_node() | |
_, out, _ = execute(['gluster', 'volume', 'info', volume]) | |
res = [] | |
for line in out.split('\n'): | |
m = re.match(r'^Brick\d+: %s:(.+)' % node, line) | |
if m: | |
res.append(m.group(1)) | |
if not res: | |
raise Exception("Failed to read bricks on volume: %s", volume) | |
logger.debug("bricks: %s", res) | |
return res | |
def get_file_attr(brick, path): | |
_, out, _ = execute([ | |
'getfattr', | |
'-d', | |
'-m', | |
'.', | |
'-e', | |
'hex', | |
'%s%s' % (brick, path) | |
]) | |
return '\n'.join(filter(None, out.split('\n'))) | |
def get_brick_gfid(brick, path): | |
brick_path = get_brick_data_path(brick, path) | |
if not os.path.exists(brick_path): | |
logger.debug("brick %s misses %s", brick, path) | |
return | |
out = get_file_attr(brick, path) | |
for line in out.split('\n'): | |
m = re.match(r'^trusted\.gfid=0x(.+)', line) | |
if m: | |
return m.group(1) | |
logger.debug("no gfid in attribute output") | |
def get_gfid(volume, path): | |
for brick in get_bricks(volume): | |
val = get_brick_gfid(brick, path) | |
if val: | |
return val | |
logger.warning('no brick contains file on path %s', path) | |
def get_brick_data_path(brick, path): | |
return '%s%s' % (brick, path) | |
def get_brick_attrs_path(brick, gfid): | |
attrs_path = '%s/%s/%s-%s-%s-%s-%s' % ( | |
gfid[0:2], | |
gfid[2:4], | |
gfid[0:8], | |
gfid[8:12], | |
gfid[12:16], | |
gfid[16:20], | |
gfid[20:32], | |
) | |
return '%s/.glusterfs/%s' % (brick, attrs_path) | |
class FileStats(dict): | |
def __str__(self): | |
res = [] | |
for k, v in sorted(self.items()): | |
if k != 'path': | |
res.append(" %s: %s" % (k, v)) | |
return '\n'.join(res) | |
def get_file_stats(path): | |
exists = os.path.exists(path) | |
res = FileStats() | |
res.update({ | |
"path": path, | |
}) | |
if not exists: | |
res.update({ | |
"exists": exists | |
}) | |
return res | |
stats = os.stat(path) | |
if os.path.isdir(path): | |
res.update({ | |
'isdir': True | |
}) | |
_, ls_out, _ = execute(['ls', '-ld', path]) | |
else: | |
_, ls_out, _ = execute(['ls', '-la', path]) | |
res.update({ | |
"md5sum": execute(['md5sum', path])[1], | |
}) | |
try: | |
username = pwd.getpwuid(stats.st_uid).pw_name | |
except KeyError: | |
username = stats.st_uid | |
try: | |
groupname = grp.getgrgid(stats.st_gid).gr_name | |
except KeyError: | |
groupname = stats.st_gid | |
res.update({ | |
"size": stats.st_size, | |
"ctime": stats.st_ctime, | |
"atime": stats.st_atime, | |
"mtime": stats.st_mtime, | |
"mode": oct(stats.st_mode), | |
"user": username, | |
"group": groupname, | |
"ls-la": ls_out.rstrip(), | |
"stat": execute(['stat', path])[1], | |
}) | |
return res | |
class BrickFileInfo(object): | |
def __init__(self, attributes, data_file, attributes_file): | |
self.attributes = attributes | |
self.data_file = data_file | |
self.attributes_file = attributes_file | |
def __str__(self): | |
res = [] | |
if self.attributes: | |
res += ["attributes:", str(self.attributes)] | |
if self.data_file: | |
res += ["data file: %s" % self.data_file['path'], | |
str(self.data_file)] | |
if self.attributes_file: | |
res += ["attributes file: %s" % self.attributes_file['path'], | |
str(self.attributes_file), ] | |
return '\n'.join(res) | |
class VolumeFileInfo(object): | |
def __init__(self, volume, path): | |
self.volume = volume | |
self.path = path | |
self.brick_files = [] | |
def __str__(self): | |
res = [ | |
"** Info on %s on '%s'" % (self.path, self.volume) | |
] | |
for obj in self.brick_files: | |
res.append("*" * 79) | |
res.append(str(obj)) | |
return '\n'.join(res) | |
def add_brick(self, item): | |
self.brick_files.append(item) | |
def get_info(volume, path, show_attrs, show_data_file, show_attr_file): | |
res = VolumeFileInfo(volume, path) | |
for brick in get_bricks(volume): | |
gfid = get_brick_gfid(brick, path) | |
if not gfid: | |
continue | |
data_path = get_brick_data_path(brick, path) | |
attrs_path = get_brick_attrs_path(brick, gfid) | |
res.add_brick(BrickFileInfo( | |
get_file_attr(brick, path) if show_attrs else None, | |
get_file_stats(data_path) if show_data_file else None, | |
get_file_stats(attrs_path) if show_attr_file else None, | |
)) | |
return res | |
def set_gf_attr(volume, brick, path, key, value): | |
file_path = get_brick_data_path(brick, path) | |
if not os.path.exists(file_path): | |
logger.warning("File is missing %s", file_path) | |
exit(1) | |
print ([ | |
'setfattr', | |
'-n', | |
key, | |
'-v', | |
value, | |
file_path | |
]) | |
get_file_attr(brick, file_path) | |
_gfid_cache = {} | |
_filecache = os.path.join(os.path.dirname(__file__), '.filecache') | |
if os.path.exists(_filecache): | |
with open(_filecache) as f: | |
try: | |
_gfid_cache = json.loads(f.read()) | |
except: | |
print >> sys.stderr, "failed to open", _filecache | |
def gfid_to_filename_from_cache(gfid): | |
return _gfid_cache.get(gfid) | |
def gfid_to_filename_set_cache(gfid, filename): | |
_gfid_cache[gfid] = filename | |
with open(_filecache, 'w') as f: | |
f.write(json.dumps(_gfid_cache, indent=2)) | |
def clean_t_copies(volume, gffile, safe): | |
# /data2/gluster/.glusterfs/eb/af/ebaf76d7-24fb-41a6-8f78-8904a537f7db | |
parts = gffile.split('/') | |
good_dot_files = set() | |
bad_dot_files = set() | |
good_data_files = set() | |
bad_data_files = set() | |
bricks = get_bricks(volume) | |
for brick in bricks: | |
dot_file = os.path.join(brick, '/'.join(parts[3:])) | |
dot_file_stats = get_file_stats(dot_file) | |
# print 'dot_file', dot_file, dot_file_stats | |
if 'mode' not in dot_file_stats: | |
continue | |
m = dot_file_stats['mode'] | |
if m == '0101000': | |
bad_dot_files.add(dot_file) | |
else: | |
good_dot_files.add(dot_file) | |
particular_brick = '/'.join(parts[:3]) | |
gfid = parts[-1] | |
particular_data_file = gfid_to_filename_from_cache(gfid) | |
if not particular_data_file: | |
print 'looking for file by gfid', particular_brick, gfid | |
_, out, _ = execute([ | |
'./gfid-resolver.sh', | |
particular_brick, | |
gfid | |
]) | |
particular_data_file= None | |
for line in out.split('\n'): | |
if 'File' not in line: | |
continue | |
particular_data_file = line.split('\t')[-1].strip() | |
break | |
if particular_data_file: | |
gfid_to_filename_set_cache(gfid, particular_data_file) | |
else: | |
print >>sys.stderr, "can't find file", out | |
for brick in bricks: | |
data_file = os.path.join( | |
brick, | |
'/'.join(particular_data_file.split('/')[3:]) | |
) | |
data_file_stats = get_file_stats(data_file) | |
# print 'dot_file', data_file, data_file_stats | |
if 'mode' not in data_file_stats: | |
continue | |
m = data_file_stats['mode'] | |
if m == '0101000': | |
bad_data_files.add(data_file) | |
else: | |
good_data_files.add(data_file) | |
warnings = [] | |
if not len(bad_dot_files): | |
warnings.append("there are no bad dot files") | |
if not len(bad_data_files): | |
warnings.append("there are no bad data files") | |
if len(bad_dot_files) != len(bad_data_files): | |
warnings.append( | |
"there is different number of bad dot and data files %s vs %s" | |
% (len(bad_dot_files), len(bad_data_files)) | |
) | |
if not len(good_dot_files): | |
warnings.append("there are no good dot files") | |
if not len(good_data_files): | |
warnings.append("there are no good data files") | |
if len(good_dot_files) != len(good_data_files): | |
warnings.append( | |
"there is different number of good dot and data files %s vs %s" | |
% (len(good_dot_files), len(good_data_files)) | |
) | |
if len(good_data_files) > 1: | |
warnings.append("there is more then one good copy") | |
if warnings: | |
print >> sys.stderr, 'doing nothing becase', "\n".join(warnings) | |
print 'good_dot_files', good_dot_files | |
print 'good_data_files', good_data_files | |
print 'bad_dot_files', bad_dot_files | |
print 'bad_data_files', bad_data_files | |
exit(1) | |
statements = [] | |
if (good_dot_files | |
and good_data_files | |
and bad_dot_files | |
and bad_data_files): | |
for name in bad_dot_files: | |
if not safe: | |
os.unlink(name) | |
statements.append("removing %s" % name) | |
for name in bad_data_files: | |
if not safe: | |
os.unlink(name) | |
statements.append("removing %s" % name) | |
good_file_path = list(good_data_files)[0] | |
original_name = os.path.basename(good_file_path) | |
original_folder = os.path.dirname(good_file_path) | |
cmd = 'mv %s /tmp/%s' % (good_file_path, original_name) | |
statements.append(cmd) | |
if not safe: | |
os.system(cmd) | |
shared_path = os.path.join( | |
'/shared', | |
'/'.join(original_folder.split('/')[3:]), | |
original_name | |
) | |
cmd = 'mv /tmp/%s %s' % (original_name, shared_path) | |
statements.append(cmd) | |
if not safe: | |
os.system(cmd) | |
print 'ok' | |
print '\n'.join(statements) | |
def main(): | |
parser = optparse.OptionParser() | |
parser.add_option("--volume", dest="volume", default="shared", | |
help="which volume to use") | |
parser.add_option("--fix-copies", dest="clean_t_copies", | |
action="store_true", default=False, | |
help="clean ------T copies") | |
parser.add_option("--safe", dest="safe", | |
action="store_true", default=False, | |
help="clean ------T copies dry run") | |
parser.add_option("--info", dest="get_info", | |
action="store_true", default=False, | |
help="get info about specified path") | |
parser.add_option("-A", "--show-attrs", dest="show_attrs", | |
action="store_true", default=False, | |
help="") | |
parser.add_option("-d", "--show-data-file", dest="show_data_file", | |
action="store_true", default=False, | |
help="") | |
parser.add_option("-a", "--show-attr-file", dest="show_attr_file", | |
action="store_true", default=False, | |
help="") | |
parser.add_option("--gfid", dest="get_gfid", | |
action="store_true", default=False, | |
help="get GFID on specified path") | |
parser.add_option("--set-gf-attr", dest="set_gf_attr", | |
action="store_true", default=False, | |
help="set info about specified path") | |
parser.add_option("--key", dest="key", default=None, | |
help="key to set attribute on") | |
parser.add_option("--value", dest="value", default=None, | |
help="key to set attribute on") | |
parser.add_option("--brick", dest="brick", default=None, | |
help="brick on which path should be looked up") | |
parser.add_option("-v", dest="verbose", | |
action="store_true", default=False, | |
help="print status messages to stdout") | |
options, args = parser.parse_args() | |
if options.verbose: | |
logger.setLevel(logging.DEBUG) | |
else: | |
logger.setLevel(logging.INFO) | |
if options.clean_t_copies: | |
clean_t_copies(options.volume, args[0], options.safe) | |
elif options.get_info: | |
show_attrs = options.show_attrs | |
show_data_file = options.show_data_file | |
show_attr_file = options.show_attr_file | |
if not show_attrs and not show_data_file and not show_attr_file: | |
show_attrs = True | |
show_data_file = True | |
show_attr_file = True | |
for path in args: | |
print get_info( | |
options.volume, | |
path, | |
show_attrs, | |
show_data_file, | |
show_attr_file | |
) | |
elif options.get_gfid: | |
for path in args: | |
print get_gfid(options.volume, path) | |
elif options.set_gf_attr: | |
if not options.key or options.value or options.brick: | |
parser.error("--key, --value and --brick should be set") | |
if len(args) != 1: | |
parser.error("only one path should be supplied") | |
set_gf_attr( | |
options.volume, | |
options.brick, | |
args[0], | |
options.key, | |
options.value | |
) | |
else: | |
parser.print_usage() | |
exit(1) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment