Created
October 26, 2022 06:00
-
-
Save ronen-fr/bc7661bc91d75c3c70563082d1bbca7f to your computer and use it in GitHub Desktop.
~/teu_collect_oct.py -v --logs --parse rfriedma-2022-10-22_18:19:41-rados:thrash-rf-tous1-ci-2110-distro-default-smithi 7077467
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import sys | |
import argparse | |
import datetime | |
import subprocess | |
import tempfile | |
import os | |
import pathlib | |
import re | |
import fnmatch | |
from subprocess import Popen | |
homedir = pathlib.Path('/home/rfriedma') | |
log_exist=False | |
# file patterns: | |
pat_osd_num=re.compile(r'(ceph-)*osd.(?P<osdn>[0-9]+).log(.gz)*') | |
#pat_osd_num=re.compile(r'(?:(ceph-)*)osd.(?P<osdn>[0-9]+).log(.gz)*') | |
#pat_osd_num=re.compile(r'osd.(?P<osdn>[0-9]+).log') | |
core_path_parts=re.compile(r'.*/(?P<rmt>((gibba)|(smithi))[0-9]+)/coredump/(?P<unzped_name>.*core).gz') | |
#crash_path_parts=re.compile(r'.*/(?P<rmt>((gibba)|(smithi))[0-9]+)/crash/(?P<instance>.*)') | |
crash_path_parts=re.compile(r'.*/(?P<rmt>((gibba)|(smithi))[0-9]+)/crash/(posted/)*(?P<instance>.*)') | |
#clogs_path_parts=re.compile(r'.*/(?P<rmt>((gibba)|(smithi))[0-9]+)/log/(?P<unzped_name>ceph.log).gz') | |
clogs_path_parts=re.compile(r'.*/((gibba)|(smithi))(?P<rmt>[0-9]+)/log/(?P<unzped_name>ceph.log).gz') | |
logs={} | |
num_osd_lines = 800000 | |
def locate_logs(p, n): | |
global log_exist | |
al=list(p.glob('remote/*/log/ceph-osd.*.log.gz')) + list(p.glob('remote/*/log/osd.*.log')) + list(p.glob('remote/ubu*/log/*/ceph-osd.*.log.gz')) | |
if (len(al) > 0): | |
log_exist=True | |
return [ True, al ] | |
else : | |
print ('No logs!!!') | |
log_exist=False | |
return [ False, al ] | |
def cp_cluster_logs(p, n): | |
clogs=list(p.glob('remote/*/log/ceph.log.gz')) | |
for cfn in clogs: | |
clog_parts = clogs_path_parts.search(str(cfn)) | |
print('cluster log: ', cfn) | |
#print('clog_parts: ', clog_parts.group(0)) | |
print('rmt: ', clog_parts.group('rmt'), clog_parts.group('unzped_name')) | |
target_wo = homedir / n / ( 'c' + clog_parts.group('rmt') + '_ceph.log' ) | |
print('target: ', target_wo) | |
target_fd = open(target_wo, 'w') | |
zct = subprocess.Popen([ 'zcat', cfn ], stdout=subprocess.PIPE) | |
tstamp= subprocess.Popen(["gawk -e ' { all=$_; x=\"@\"$1; cmd=\"\'\" date --rfc-3339=ns -d \"\'\"x\"\'\" \"\'\"; cmd | getline z; close(cmd); z1=substr(z,0,length(z)-12); sub(\".*\",z1,$1); print $_ } ' " ], shell=True, stdin=zct.stdout, stdout=target_fd) | |
zct.stdout.close() | |
tstamp.communicate() | |
def help_cpcmd(): | |
print('scp ~/src/teu_collect [email protected]:/home/rfriedma/teu_collect') | |
print('ssh [email protected] chmod +x /home/rfriedma/teu_collect') | |
def list_main(is_verbose, p, n): | |
# do we have logs at all (for most 'dead' jobs - we won't have) | |
[gotlogs, lgs] = locate_logs(p, n) | |
if is_verbose: | |
print(gotlogs) | |
print(lgs) | |
# list the OSDs | |
#subprocess.run(["find", "remote", "-iname", "ceph-osd.log.gz", "-ls"], check=False) | |
# search for core files | |
print('cores:') | |
subprocess.run(["find", p/"remote", "-iname", "core.*", "-ls"], check=False) | |
return lgs | |
def cp_logs(p, n, lgs, must_not_bg): | |
global pat_osd_num | |
print('cp_logs') | |
print(lgs) | |
for lpath in lgs: | |
osdnum = pat_osd_num.search(lpath.name).group('osdn') | |
#logs[osdnum.group('osdn')] = lpath | |
logs[osdnum] = lpath | |
target = homedir / n / osdnum | |
print(lpath.name, ' -> ' , osdnum, '\t -> ', target) | |
target_fd = open(target, 'w') | |
if (fnmatch.fnmatch(lpath, '*.gz')): | |
zct = subprocess.Popen([ 'zcat', lpath ], stdout=subprocess.PIPE) | |
else: | |
zct = subprocess.Popen([ 'cat', lpath ], stdout=subprocess.PIPE) | |
grp1= subprocess.Popen([ 'grep', '-a', '-n', '-v', 'bluefs' ], stdin=zct.stdout, stdout=subprocess.PIPE) | |
grp2= subprocess.Popen([ 'grep', '-a', '-v', 'bluest' ], stdin=grp1.stdout, stdout=subprocess.PIPE) | |
tl1= subprocess.Popen([ 'tail', str(-num_osd_lines) ], stdin=grp2.stdout, stdout=subprocess.PIPE) | |
fld= subprocess.Popen([ 'fold', '-s', '-w', '500' ], stdin=tl1.stdout, stdout=target_fd) | |
zct.stdout.close() | |
if must_not_bg: | |
fld.communicate() | |
return logs | |
# zcat logf | grep -v bluestore | grep -v bluefs | tail -f 300000 | fold -s -w500 > ~/tn/n | |
def cp_cores(is_verbose, p, n) : | |
print('copying possible core files') | |
al=list(p.glob('remote/*/coredump/*.*.core.gz')) + list(p.glob('remote/*/coredump/*.*.core')) | |
if (len(al) > 0): | |
print('found cores') | |
target = homedir / n | |
for cfn in al: | |
core_parts = core_path_parts.search(str(cfn)) | |
if is_verbose: | |
print('cfn: ', cfn) | |
print('core_parts: ', core_parts.group(0)) | |
print('unzp: ', core_parts.group('unzped_name')) | |
target_wgz = target / ( core_parts.group('rmt') + '__' + core_parts.group('unzped_name') + '.gz' ) | |
target_wo = target / ( core_parts.group('rmt') + '__' + core_parts.group('unzped_name') ) | |
print('target: ', target_wo) | |
subprocess.run(['cp', '-f', cfn, target_wgz], check=False) | |
subprocess.run(['gunzip', target_wgz], check=False) | |
subprocess.run(['file', target_wo], check=False) | |
def cp_crashes(p,n) : | |
print('copying possible crash') | |
target = homedir / n | |
al=list(p.glob('remote/*/crash')) | |
if (len(al) > 0): | |
for cfn in al: | |
if os.path.exists(cfn / 'posted'): | |
cfn = cfn / 'posted' | |
with os.scandir(cfn) as it: | |
for insta in it: | |
if not insta.name.startswith('.'): | |
print('Crash insta: ', insta.name) | |
subprocess.run(['mkdir', target/insta.name]) | |
subprocess.run(['cp ' + str(cfn) + '/' + insta.name + '/log ' + str(target/insta.name)], shell=True, check=False) | |
subprocess.run(['cp ' + str(cfn) + '/' + insta.name + '/meta ' + str(target/insta.name)], shell=True, check=False) | |
def parse_log(d, n, oslog) : | |
print ('parsing ', oslog, ' into /tmp/T_*, /tmp/Q_*, *_scr') | |
subprocess.run(['grep -a -n signal ' + str(oslog) + ' > /tmp/Q_' + str(n)], shell=True) | |
subprocess.run([r"egrep -a -- 'scrubber|sched-queue|sched_scrub' " + str(oslog) + ' > ' + str(oslog) + '_scr' ], shell=True) | |
subprocess.run(['tail -40 ' + str(oslog) + ' > /tmp/T_' + str(n)], shell=True) | |
def parse_logs(p, n) : | |
target = homedir / n | |
nlist=list(range(0,10)) | |
for osn in nlist: | |
if os.path.isfile(target / str(osn)) : | |
parse_log(target,osn, target / str(osn)) | |
def extra_symlinks(p, n) : | |
target = homedir / n / '_' | |
if target.exists() : | |
print (f'should unlink {target}') | |
if not target.exists() : | |
print (f'symlinking from {p} to {target}') | |
os.symlink(p, target) | |
parser = argparse.ArgumentParser(description='collect cores and logs') | |
parser.add_argument('-v', '--verbose', action='store_true') | |
parser.add_argument('--parse', action='store_true', help='extract info from the osd logs') | |
parser.add_argument('--logs', action='store_true', help='unzip log tails') | |
parser.add_argument('--no-cores', action='store_true') | |
parser.add_argument('-f', '--force', action='store_true', help='OK to use existing target dir') | |
parser.add_argument('-s', '--symlink', action='store_true', help='create symlinks to some conf files') | |
parser.add_argument('path', help='rfriedma/...') | |
parser.add_argument('tnum', help='test number') | |
args = parser.parse_args() | |
fpath = pathlib.Path('/a') / args.path / args.tnum | |
print('fpath: ', fpath) | |
if args.verbose: | |
print('Path: ', args.path) | |
print('#: ', args.tnum) | |
help_cpcmd() | |
print('\n-----------------------------\n\n') | |
target_dir = homedir / args.tnum | |
if target_dir.exists() : | |
print('Target dir ', target_dir, 'already_exists!!!!!!!!') | |
if args.logs and not args.force : | |
sys.exit(1) | |
else : | |
os.mkdir(target_dir) | |
all_logs = list_main(args.verbose, fpath, args.tnum) | |
subprocess.run(['cp', fpath/'teuthology.log', target_dir], check=False) | |
subprocess.run(['cat -n ' + str(fpath/'teuthology.log') + ' | tail -500000 |fold -s -w 500 > ' + str(target_dir / 'teu')], shell=True, check=False) | |
if not args.no_cores : | |
cp_crashes(fpath, args.tnum) | |
if not args.no_cores : | |
cp_cores(args.verbose, fpath, args.tnum) | |
if log_exist and args.logs : | |
# collecting the logs can be background-run - but only if | |
# not parsing them afterwards | |
cp_logs(fpath, args.tnum, all_logs, args.parse) | |
if args.parse or args.logs : | |
parse_logs(fpath, args.tnum) | |
cp_cluster_logs(fpath, args.tnum) | |
# if args.symlink : | |
extra_symlinks(fpath, args.tnum) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment