Skip to content

Instantly share code, notes, and snippets.

@dixsonhuie
Last active November 7, 2024 20:06
Show Gist options
  • Save dixsonhuie/825194e24e6dfa66ec230aeb98ba2c2b to your computer and use it in GitHub Desktop.
Save dixsonhuie/825194e24e6dfa66ec230aeb98ba2c2b to your computer and use it in GitHub Desktop.
python examples
import os
import sys
import csv
import re
import logging
import argparse
import datetime
col_li = ['filename', 'line_number', 'host', 'pid', 'comp', 'id', 'time', 'ms', 'category', 'level', 'logger', 'message']
dirlist = [r'E:\log']
start_date = None
end_date = None
# date format used to convert command line arguments into a datetime object
# example: 2021-09-14
filter_date_fmt = '%Y-%m-%d'
# adding hours, minutes and seconds
filter_datetime_fmt = filter_date_fmt + ' %H:%M:%S'
home_dir = os.path.expanduser('~')
filename_prefix = 'app_log_summary'
output_filename = ''
show_fullpath = False
# list of extensions to visit
extlist = ['\.\d+', '.log', '.out', '.stdouterr', '.err']
# regex representing entire date time portion from a line in a log file
# example: 2021-09-14 16:22,124
datefmt = r'(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d)'
# search for the following strings that may indicate an error
error_li = [ 'warning', 'severe', 'exception', 'error', 'failure', 'Long GC collection']
# for setting log level
level_li = ['SEVERE', 'WARNING', 'INFO', 'CONFIG', 'FINE', 'FINER', 'FINEST']
host_li = []
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
# check if string matches any of the hostnames
def get_hostname(s):
for host in host_li:
pattern = '.*({0}).*'.format(host)
m = re.match(pattern, s)
if m:
return m.group(1)
return ''
# check if filename contains pid and component information
# only works if filename format has not changed
def get_pid(s):
comp = ''
id = ''
host = ''
pid = ''
patternstr = r'.*(gsc|manager|gsm|lus)_(\d+)-([\w\.]+)-(\d+).*'
m = re.match(patternstr, s)
if m:
comp = m.group(1)
id = m.group(2)
host = m.group(3)
pid = m.group(4)
else:
# other processes: gsa, GSWebUI, ui, service
patternstr = r'.*(gsa|GSWebUI|ui|service)-([\w\.]+)-(\d+).*'
m = re.match(patternstr, s)
if m:
comp = m.group(1)
host = m.group(2)
pid = m.group(3)
return (comp, id, host, pid)
def process_file(fullpath):
line_number = 0
with open(fullpath, encoding="latin-1") as f:
sDate = ''
dtDate = None
millis = ''
for line in f:
found = False
line_number += 1
# skip lines beginning with white space
if re.match(r'\s', line):
continue
# save the timestamp for lines with no timestamp
patternstr = r'.*{}.*'.format(datefmt)
m = re.match(patternstr, line)
if m:
sDate = m.group(1)
dtDate = datetime.datetime.strptime(sDate, filter_datetime_fmt)
millis = m.group(2)
# filter out log lines by date
if start_date is not None and dtDate is not None and dtDate < start_date:
continue
if end_date is not None and dtDate is not None and dtDate > end_date:
continue
for error_pattern in error_li:
if re.search(error_pattern, line, re.IGNORECASE):
found = True
break
logging.debug("log date as string: %s, log date: %s", sDate, '' if dtDate is None else dtDate.strftime(filter_date_fmt))
if found == True:
# truncate the line
line = line[:300]
line = line.rstrip()
logging.debug("Line: %s", line)
process_line(line, fullpath, line_number, sDate, millis)
def process_line(s, fullpath, line_number, date, millis):
# example: 2017-01-05 14:11:21,821 LUS INFO [com.sun.jini.reggie] - Exception
# example: 2016-12-31 17:38:57,334 pmds.deployment-1.8.9-pu.18 [2] WARNING [com.gigaspaces.core.common] - Primary space is unavailable
patternstr = r'{}{}'.format(datefmt, r' ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$')
m = re.match(patternstr, s)
#m = re.match(r'(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$', s)
'''
^ date ^ millis ^ category ^ optional^ level ^ logger ^ message
match 0 or 1 times
'''
if m:
# 1 date
# 2 millis
# 3 category
# 4 optional, '[2]' in comment above
# 5 level
# 6 logger
# 7 message
category = ''
level = ''
if m.group(4) == None:
category = m.group(3)
# extract level information
# eg., LUS INFO
for i in level_li:
index = category.find(i)
if index >= 0:
level = category[index:]
category = category[0:index]
break
else:
category = m.group(3) + m.group(4)
level = m.group(5)
# this group also grabs the space that may come after this optional string; need to strip it out
category = category.strip()
level = level.strip()
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], fileinfo['pid'], fileinfo['comp'], fileinfo['id'], m.group(1), m.group(2), category, level, m.group(6), m.group(7)])
else:
# sometimes clients just provide output of the gs-agent process
# [gsc][1/10120] 2017-10-11 10:52:37,557 CommonClassLoader WARNING [net.jini.discovery.LookupLocatorDiscovery] - java.net.SocketTimeoutException: connect timed out - using unicast locator 10.10.10.117:4174 - delay next lookup by 1,000 ms
patternstr = r'{}{}{}'.format(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*', datefmt, r' ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$')
m = re.match(patternstr, s)
#m = re.match(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$', s)
'''
^ proc ^ id ^ pid - the rest is a repeat of the regex used above
'''
if m:
# 1 component
# 2 id
# 3 pid
# 4 date
# 5 millis
# 6 category
# 7 optional
# 8 level
# 9 logger
# 10 message
category = ''
level = ''
if m.group(7) == None:
category = m.group(6)
# extract level information
for i in level_li:
index = category.find(i)
if index >= 0:
level = category[index:]
category = category[0:index]
break
category = category.strip()
if category.upper() == m.group(1).upper():
category = ''
else:
category = m.group(6) + m.group(7)
level = m.group(8)
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], m.group(3), m.group(1), m.group(2), m.group(4), m.group(5), category, level, m.group(9), m.group(10)])
else:
#[manager][1/13986] Caused by: com.gigaspaces.security.AuthenticationException: Authentication request is invalid - you are not logged in.
# log message pattern missing timestamp
patternstr = r'{}{}'.format(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*', r'(.*)$')
# ^comp ^id ^pid ^message
m = re.match(patternstr, s)
if m:
# 1 component
# 2 id
# 3 pid
# 4 message
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], m.group(3), m.group(1), m.group(2), date, millis, '', '', '', m.group(4)])
else:
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], fileinfo['pid'], fileinfo['comp'], fileinfo['id'], date, millis, '', '', '', s])
def process_args():
global dirlist, start_date, end_date, filename_prefix, output_filename, host_li, show_fullpath
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to.")
parser.add_argument("--start_date", help="the date to begin processing errors. Log lines with dates before the start date will be filtered out. Example format: 2021-09-21")
parser.add_argument("--end_date", help="the date to end processing errors. Log lines with dates after the end date will be filtered out. Example format: 2021-09-21")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--hosts", help="list of hosts, separated by commas.")
parser.add_argument("--filename_prefix", help="Output filename prefix.")
parser.add_argument("--show_fullpath", help="Output the full path. Default is false.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a set of XAP log files formatted with standard XAP out-of-the-box settings.")
args = parser.parse_args()
if args.filename_prefix:
filename_prefix = args.filename_prefix
if args.start_dir:
dirlist = [args.start_dir]
if args.start_date:
start_date = datetime.datetime.strptime(args.start_date, filter_date_fmt)
if args.end_date:
end_date = datetime.datetime.strptime(args.end_date, filter_date_fmt)
if args.output_dir:
output_filename = args.output_dir + os.path.sep + filename_prefix + "-" + gettimestamp() + ".csv"
else:
output_filename = home_dir + os.path.sep + filename_prefix + '-' + gettimestamp() + ".csv"
if args.show_fullpath:
show_fullpath = args.show_fullpath
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.hosts:
host_li = args.hosts.split(',')
def myvisitor(extlist, dirname, names):
global fileinfo
logging.debug("Current directory: %s", dirname)
for f in names:
(b, ext) = os.path.splitext(f)
logging.debug("Filename base: %s Ext: %s", b, ext)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(dirname, f)
logging.debug("Fullpath: %s", fullpath)
try:
hostname = get_hostname(f)
fileinfo = {'host': hostname}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
#except OSError, detail:
# print detail
break
def myvisitor_2(fullpath, start_dir, filename):
global fileinfo
try:
relative_path = "{}{}".format('.', fullpath.replace(start_dir, '', 1))
hostname = get_hostname(relative_path)
if not show_fullpath:
path = relative_path
else:
path = fullpath
(comp, id, host, pid) = get_pid(filename)
if hostname == '':
hostname = host
fileinfo = {'host': hostname, 'path': path, 'comp': comp, 'id': id, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
global mywriter
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
mywriter.writerow(col_li)
for i in dirlist:
logging.debug("Processing: %s", i)
for root, dirs, files in os.walk(i):
for name in files:
logging.debug(os.path.join(root, name))
(b, ext) = os.path.splitext(name)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(root, name)
myvisitor_2(fullpath, i, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(i, myvisitor, extlist)
main()
import argparse
import logging
import os
import sys
start_dir = os.path.expanduser('~')
show_relpath = False
filter_li = []
class file_suffix_filter:
# heap dump files
# extlist = ['.hprof']
def __init__(self, li):
self.extlist = li
def hasFileMatch(self):
return True
def hasDirectoryMatch(self):
return False
def isFileMatch(self, path, filename):
(base, ext) = os.path.splitext(filename)
if ext in self.extlist:
return True
else:
return False
class named_dir_filter:
def __init__(self, named_dir_li):
self.dirname_li = named_dir_li
def hasFileMatch(self):
return False
def hasDirectoryMatch(self):
return True
def isDirectoryMatch(self, dirname):
logging.debug("dirname is:" + dirname)
if dirname in self.dirname_li:
return True
else:
return False
class large_file_filter:
def __init__(self, f_size):
self.file_size = f_size
def hasFileMatch(self):
return True
def hasDirectoryMatch(self):
return False
def isFileMatch(self, path, filename):
fname = os.path.join(path, filename)
if not os.path.islink(fname):
f_size = os.path.getsize(fname)
if f_size > self.file_size:
return True
else:
return False
else:
return False
# recursively visit directory and its children
def process():
for root, dirs, files in os.walk(start_dir):
rel_dir = os.path.relpath(root, start_dir)
for name in files:
for filter in filter_li:
if filter.hasFileMatch() and filter.isFileMatch(root, name):
if show_relpath == True:
filename = os.path.join('.', rel_dir, name)
print(filename)
else:
filename = os.path.join(root, name)
print(filename)
for dir in dirs:
for filter in filter_li:
if filter.hasDirectoryMatch() and filter.isDirectoryMatch(dir):
if show_relpath == True:
filename = os.path.join('.', rel_dir, dir)
print(filename)
else:
filename = os.path.join(root, dir)
print(filename)
def process_args():
global start_dir, show_relpath, filter_li;
is_file_suffix_filter = True
file_suffix = ['.hprof']
is_named_dir_filter = True
is_large_file_filter = True
large_file_filter_size = 1_000_000_000
named_dir = ['logs']
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="The root directory to begin processing. Default is the user's home directory.")
parser.add_argument("--show_relpath", help="Output the relative path, otherwise show full path. Default is False.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--file_suffix_filter", choices=['true', 'false'], help="Filter in files that match a suffix. Default is true.")
parser.add_argument("--file_suffix", help="A list of file suffixes to be used with --file_suffix_filter, separated by commas. Default suffixes: '.hprof'.")
parser.add_argument("--named_dir_filter", choices=['true', 'false'], help="Filter in directories based on a name. Default is true.")
parser.add_argument("--named_dir", help="A list of directories used with --named_dir_filter, separated by commas. Default directories: 'logs'. Other suggestions: target,work,deploy")
parser.add_argument("--large_file_filter", choices=['true', 'false'], help="Filter in files larger than a default size of {}. Default is true.".format(large_file_filter_size))
parser.add_argument("--large_file_filter_size", help="Large file filter size.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program will recurse a directory and look for files to be cleaned up.")
# process arguments
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.show_relpath:
if args.show_relpath.lower() == 'true' or args.show_relpath.lower() == 't':
show_relpath = True
if args.file_suffix_filter:
if args.file_suffix_filter.lower() == 'true' or args.file_suffix_filter.lower() == 't':
is_file_suffix_filter = True
else:
is_file_suffix_filter = False
if args.file_suffix:
file_suffix = args.file_suffix.split(',')
if args.named_dir_filter:
if args.named_dir_filter.lower() == 'true' or args.named_dir_filter.lower() == 't':
is_named_dir_filter = True
else:
is_named_dir_filter = False
if args.named_dir:
named_dir = args.named_dir.split(',')
if args.large_file_filter:
if args.large_file_filter.lower() == 'true' or args.large_file_filter.lower() == 't':
is_large_file_filter = True
else:
is_large_file_filter = False
if args.large_file_filter_size:
large_file_filter_size = int(args.large_file_filter_size)
# set values based on arguments
if is_file_suffix_filter == True:
filter_li.append(file_suffix_filter(file_suffix))
if is_named_dir_filter == True:
filter_li.append(named_dir_filter(named_dir))
if is_large_file_filter == True:
filter_li.append(large_file_filter(large_file_filter_size))
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process()
main()
import argparse
import csv
from datetime import datetime
import logging
import sys
file = r'C:\Users\Dixson\tmp.csv'
before_dt = None
after_dt = None
col_no = 1
def process(fin):
with open('tmp.csv', 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
with open(fin, newline='') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
value = row[col_no]
dt = convert_dt(value)
logging.debug("Value: {}, date: {} on column {}".format(value, dt, col_no))
if dt == None:
mywriter.writerow(row)
#print(', '.join(row))
continue
if (before_dt == None or dt < before_dt):
if( after_dt == None or dt > after_dt):
mywriter.writerow(row)
#print(', '.join(row))
# example date: 2017-01-05 14:11:21
def convert_dt(s):
try:
return datetime.strptime(s, '%Y-%m-%d %H:%M:%S')
except Exception as error:
return None
def process_args():
global file, before_dt, after_dt, col_no
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("-f", "--file", help="the input file. If not provided, /dev/stdin is used.")
parser.add_argument("--before", help='include dates before provided date. E.g., --before "2017-01-05 14:11:21"')
parser.add_argument("--after", help="include dates after provided date.")
parser.add_argument("--columnNumber", help="the column number that has the date field, beginning at 0.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a csv file using the date filter criteria.")
args = parser.parse_args()
if args.file:
file = args.file
else:
# won't work on Windows
file = '/dev/stdin'
if args.before:
before_dt = convert_dt(args.before)
if args.after:
after_dt = convert_dt(args.after)
if args.columnNumber:
col_no = int(args.columnNumber)
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process(file)
main()
import argparse
import csv
import datetime
import logging
import os
import re
import sys
'''
This program parses a gc log file for stop the world phases and keywords to csv file.
PrintGCDateStamps has been enabled
PrintAdaptiveSizePolicy has been enabled
young gc types - G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold
mixed types - G1 Evacuation Pause
full gc types - Allocation Failure, System.gc()
'''
# list of columns
col_li = ['file name', 'line no.', 'host', 'pid','date time', 'process time', 'gc type/keyword', 'time', 'comment']
# list of special extensions to visit
# versioned logs will be in the format .1, .2, etc. This is checked elsewhere.
extlist = ['.current']
#extlist = ['.log', '.current', '.1', '.2']
# special patterns to search for
search_li = ['to-space','humongous']
#search_li = ['to-space','humongous', r'System.gc\(\)']
datefmt = r'(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): '
# ^ date time ^ timestamp
def gethomedir():
return os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
# globals
# list of directories to visit
dirlist = [r'E:\log']
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp())
show_relative_path = False
enable_humongous = False
# a list of possible host names
host_li = []
# log files were collected and put in directories by hostname, separated by '.'
def get_hostname(dirpath):
for h in host_li:
if dirpath.find(h) > -1:
return h
return ''
# get the pid from the log file name
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid
def get_pid(filename):
li = filename.split('pid')
if( len(li) == 1 ):
return li[0]
else:
(pid, rest) = li[1].split('.', 1)
logging.debug("pid: %s", pid)
return pid
# not used, this is a deprecated version of the visitor
#def myvisitor(extlist, dirname, names):
# global fileinfo
# logging.debug("Current directory: %s", dirname)
# for f in names:
# (p, ext) = os.path.splitext(f)
# logging.debug("%s %s", f, ext)
# if ext in extlist:
# fullpath = os.path.join(dirname, f)
# logging.debug(fullpath)
# try:
# hostname = get_hostname(dirname)
# pid = get_pid(f)
# fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid}
#
# process_file(fullpath)
# except OSError as err:
# print("OS error: {0}".format(err))
#
# #except OSError, detail:
# # print detail
def myvisitor_2(fullpath, f):
global fileinfo
try:
hostname = get_hostname(f)
pid = get_pid(f)
fileinfo = {'filename': f, 'host': hostname, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def process_file(fullpath):
# this section processes log messages that occupy a single linen
linenum = 0
f = open(fullpath, 'r')
date_time = ''
process_time = ''
# process line by line to get basic information
for line in f:
linenum += 1
m = re.match(r'^' + datefmt, line)
if m:
# save current timestamp
date_time = m.group(1)
process_time = m.group(2)
# check for keywords of interest
process_search_pattern(line, linenum, date_time, process_time)
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'):
process_jvminfo(line, linenum)
elif line.startswith(' ') == False:
# check for stw pauses that appear on one line
process_remark_cleanup(line, linenum)
# this section processes log messages that span multiple lines
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used,
# gc phases need a multi-line regex to handle
# check for stw pause that spans multiple lines
f.seek(0)
text = f.read()
f.close()
# we are interested in activity that causes a stop-the-world pause and the duration of the gc
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# https://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
# process multi-line gc phases
process_young_mixed(text)
process_full(text)
###############################################################################
# methods that process a multi-line messages
###############################################################################
def process_young_mixed(s):
'''
young generation and mixed collection share similar formats
These gc log statements show up on multiple lines.
Example:
2017-09-01T16:12:51.133+0000: 134.345: [GC pause (Metadata GC Threshold) (young) (initial-mark)
Desired survivor size 48234496 bytes, new threshold 15 (max 15)
134.346: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 0, predicted base time: 10.00 ms, remaining time: 990.00 ms, target pause time: 1000.00 ms]
134.346: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 63 regions, survivors: 0 regions, predicted young region time: 4209.46 ms]
134.346: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 63 regions, survivors: 0 regions, old: 0 regions, predicted pause time: 4219.46 ms, target pause time: 1000.00 ms]
, 0.0325663 secs]
'''
date_time = ''
process_time = 0.0
young_mixed_type = '' # young or mixed
secondary_type = '' # Eg, G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold
initial_mark = '' # tertiary type, associated with G1 Humongous Allocation and Metadata GC Threshold
gc_time = 0.0
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \((young|mixed)\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
'''
^secondary ^young/mixed ^initial_mark ^ elapsed time
'''
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
#pattern = re.compile(r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d\+\d\d\d\d): (\d*\.\d*): \[GC pause \(([ \w\.\(\)]*)\) \(young\).+?, (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
date_time = m.group(1)
process_time = m.group(2)
young_mixed_type = m.group(4)
secondary_type = m.group(3)
if m.group(5) == None:
initial_mark = ''
else:
tmp = m.group(5)
tmp = tmp.strip('() ')
initial_mark = ' ' + tmp
gc_time = m.group(6)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'GC pause - ' + young_mixed_type + ' ' + secondary_type + initial_mark, gc_time, ''])
def process_mixed(s):
'''
2017-09-01T17:53:24.732+0000: 6167.945: [GC pause (G1 Evacuation Pause) (mixed)
Desired survivor size 48234496 bytes, new threshold 1 (max 15)
- age 1: 303167832 bytes, 303167832 total
6167.945: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 8728, predicted base time: 24.66 ms, remaining time: 975.34 ms, target pause time: 1000.00 ms]
6167.945: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 105 regions, survivors: 74 regions, predicted young region time: 305.85 ms]
6167.945: [G1Ergonomics (CSet Construction) finish adding old regions to CSet, reason: reclaimable percentage not over threshold, old: 19 regions, max: 359 regions, reclaimable: 751186712 bytes (5.00 %), threshold: 5.00 %]
6167.945: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 105 regions, survivors: 74 regions, old: 19 regions, predicted pause time: 362.13 ms, target pause time: 1000.00 ms]
6168.115: [G1Ergonomics (Mixed GCs) do not continue mixed GCs, reason: reclaimable percentage not over threshold, candidate old regions: 335 regions, reclaimable: 751186712 bytes (5.00 %), threshold: 5.00 %]
, 0.1695338 secs]
'''
process_time = 0.0
date_time = ''
mixed_type = ''
gc_time = 0.0
# output similar to GC pause (young)
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
# ^mixed_type
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
date_time = m.group(1)
process_time = m.group(2)
mixed_type = m.group(3)
gc_time = m.group(5)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Mixed generation collection - ' + mixed_type, gc_time, ''])
def process_full(s):
'''
Full GC statements are also output to multiple lines.
2018-07-30T11:39:47.643-0400: 174.007: [Full GC (Heap Inspection Initiated GC) 2018-07-30T11:39:47.643-0400: 174.007: [Class Histogram (before full gc):
2018-07-25T11:59:08.922+0000: 1098967.077: [Full GC (System.gc()) 2018-07-25T11:59:08.927+0000: 1098967.081: [Class Histogram (before full gc):
2018-07-21T12:11:41.060+0000: 387110.898: [Full GC (Allocation Failure) 2018-07-21T12:11:41.060+0000: 387110.898: [Class Histogram (before full gc):
...
..., real=6.79 secs]
'''
date_time = ''
process_time = 0.0
young_type = ''
gc_time = 0.0
gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, real=(\d+\.\d+) secs\]\s*$'
# ^ full gc type
patternstr = datefmt + gcfmt
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
date_time = m.group(1)
process_time = m.group(2)
full_gc_type = m.group(3)
gc_time = m.group(4)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Full GC - ' + full_gc_type , gc_time, ''])
###############################################################################
# end methods that process a multi-line messages
###############################################################################
###############################################################################
# methods that process a single line
###############################################################################
def process_jvminfo(s, linenum):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', '', 'jvm info', '', s])
def process_remark_cleanup(s, linenum):
'''
These gc log statements show up on a single line.
Example:
2017-09-01T16:12:51.175+0000: 134.388: [GC remark 2017-09-01T16:12:51.175+0000: 134.388: [Finalize Marking, 0.0058528 secs] 2017-09-01T16:12:51.181+0000: 134.394: [GC ref-proc, 0.0001349 secs] 2017-09-01T16:12:51.181+0000: 134.394: [Unloading, 0.0032643 secs], 0.0100601 secs]
44973.856: [GC cleanup 22G->22G(30G), 0.0100070 secs]
[Times: user=0.08 sys=0.00, real=0.01 secs]
'''
gc_type = ''
date_time = ''
process_time = 0.0
gc_time = 0.0
m = re.match(r'^' + datefmt + r'\[GC remark .*, (\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC remark'
date_time = m.group(1)
process_time = m.group(2)
gc_time = m.group(3)
else:
m = re.match(r'^' + datefmt + r'\[GC cleanup .+, (\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC cleanup'
date_time = m.group(1)
process_time = m.group(2)
gc_time = m.group(3)
if gc_type != '':
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, gc_type, gc_time])
def process_search_pattern(s, linenum, date_time, process_time):
'''
Look for search strings of interest. If found write to csv.
'''
patternstr = r'({})'.format('|'.join(search_li))
m = re.search(patternstr, s, re.IGNORECASE)
if m:
search_pattern = m.group(1).lower()
if search_pattern == 'humongous' and enable_humongous == False:
return
else:
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
'''
for search_pattern in search_li:
if re.search(search_pattern, s, re.IGNORECASE):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
break
'''
###############################################################################
# end methods that process a single line
###############################################################################
def process_args():
global dirlist, output_filename, enable_humongous, show_relative_path, host_li
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--enable_humongous", help='True enables inclusion of any log messages that have to do with humongous allocation. Default is False.')
parser.add_argument("--show_relative_path", help="show relative path in filename column. true or false. Default is false.")
parser.add_argument("--hosts", help="list of hosts, separated by commas.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=r'This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCCause -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:+PrintReferenceGC')
args = parser.parse_args()
if args.start_dir:
dirlist = [args.start_dir]
if args.output_dir:
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.enable_humongous:
if args.enable_humongous.lower() == 'true' or args.enable_humongous.lower() == 't':
enable_humongous = True
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.show_relative_path and args.show_relative_path.lower() == 'true':
show_relative_path = True
if args.hosts:
host_li = args.hosts.split(',')
def main():
global mywriter
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
# write column headings
mywriter.writerow(col_li)
for dir in dirlist:
logging.debug(dir)
for root, dirs, files in os.walk(dir):
for name in files:
logging.debug(os.path.join(root, name))
(b, extension) = os.path.splitext(name)
ext = extension.lstrip('.')
if extension in extlist or ext.isdigit():
fullpath = os.path.join(root, name)
if show_relative_path == True:
# add one for path separator
index = len(dir) + 1
fname = fullpath[index:]
myvisitor_2(fullpath, fname)
else:
myvisitor_2(fullpath, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(dir, myvisitor, extlist)
main()
import argparse
import csv
import datetime
import logging
import os
import re
import sys
'''
This program parses a gc log file for stop the world phases and keywords to csv file.
young gc types - G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold
mixed types - G1 Evacuation Pause
full gc types - Allocation Failure, System.gc()
'''
# list of columns
col_li = ['file name', 'line no.', 'host', 'pid','date time', 'process time', 'gc type/keyword', 'time', 'comment']
# list of extensions to visit
extlist = ['.current', '.0', '.1', '.2', '.3', '.4', '.5']
#extlist = ['.log', '.current', '.1', '.2']
# special patterns to search for
search_li = ['to-space','humongous']
#search_li = ['to-space','humongous', r'System.gc\(\)']
datefmt = r'^(\d+\.\d+): '
# ^ timestamp
#datefmt = r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): '
# ^ date time ^ timestamp
def gethomedir():
return os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
# globals
# list of directories to visit
dirlist = [r'/tmp']
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp())
show_relative_path = False
host_li = []
# log files were collected and put in directories by hostname, separated by '.'
def get_hostname(dirpath):
for h in host_li:
if dirpath.find(h) > -1:
return h
return ''
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid
def get_pid(filename):
li = filename.split('pid')
if( len(li) == 1 ):
return li[0]
else:
(pid, rest) = li[1].split('.', 1)
logging.debug("pid: %s", pid)
return pid
def myvisitor(extlist, dirname, names):
global fileinfo
logging.debug("Current directory: %s", dirname)
for f in names:
(p, ext) = os.path.splitext(f)
logging.debug("%s %s", f, ext)
if ext in extlist:
fullpath = os.path.join(dirname, f)
logging.debug(fullpath)
try:
hostname = get_hostname(dirname)
pid = get_pid(f)
fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
#except OSError, detail:
# print detail
def myvisitor_2(fullpath, f):
global fileinfo
try:
hostname = get_hostname(f)
pid = get_pid(f)
fileinfo = {'filename': f, 'host': hostname, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def process_file(fullpath):
linenum = 0
f = open(fullpath, 'r')
date_time = ''
process_time = ''
# process line by line to get basic information
for line in f:
linenum += 1
m = re.match(datefmt, line)
if m:
# save current timestamp
date_time = m.group(0)
process_time = m.group(1)
# check for keywords of interest
#process_search_pattern(line, linenum, date_time, process_time)
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'):
process_jvminfo(line, linenum)
elif line.startswith(' ') == False:
# check for stw pauses that appear on one line
process_remark_cleanup(line, linenum)
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used,
# gc phases need a multi-line regex to handle
# check for stw pause that spans multiple lines
f.seek(0)
text = f.read()
f.close()
# we are interested in activity that causes a stop-the-world pause and the duration of the gc
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# https://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
# process multi-line gc phases
process_young(text)
process_mixed(text)
process_full(text)
def process_jvminfo(s, linenum):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', '', 'jvm info', '', s])
def process_young(s):
'''
232610.071: [GC pause (G1 Evacuation Pause) (young)
Desired survivor size 1090519040 bytes, new threshold 15 (max 15)
- age 1: 2294896 bytes, 2294896 total
- age 2: 1768760 bytes, 4063656 total
- age 3: 2228888 bytes, 6292544 total
- age 4: 4939064 bytes, 11231608 total
- age 5: 4320224 bytes, 15551832 total
- age 6: 2211832 bytes, 17763664 total
- age 7: 594464 bytes, 18358128 total
- age 8: 1539128 bytes, 19897256 total
- age 9: 3044240 bytes, 22941496 total
- age 10: 2794640 bytes, 25736136 total
- age 11: 3209632 bytes, 28945768 total
- age 12: 2267952 bytes, 31213720 total
- age 13: 2402216 bytes, 33615936 total
- age 14: 2345184 bytes, 35961120 total
- age 15: 2231848 bytes, 38192968 total
232610.071: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 13138, predicted base time: 78.16 ms, remaining time: 121.84 ms, target pause time: 200.00 ms]
232610.071: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 1035 regions, survivors: 4 regions, predicted young region time: 11.03 ms]
232610.071: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 1035 regions, survivors: 4 regions, old: 0 regions, predicted pause time: 89.19 ms, target pause time: 200.00 ms]
, 0.1156739 secs]
'''
logging.debug("In process_young")
date_time = ''
process_time = 0.0
young_type = ''
initial_mark = ''
gc_time = 0.0
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(young\).+?, (\d+\.\d+) secs\]$'
'''
^type ^ elapsed time
'''
#patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(young\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
'''
^type ^initial_mark ^ elapsed time
'''
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
#pattern = re.compile(r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d\+\d\d\d\d): (\d*\.\d*): \[GC pause \(([ \w\.\(\)]*)\) \(young\).+?, (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
young_type = m.group(2)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Young generation collection - ' + young_type + initial_mark, gc_time, ''])
def process_mixed(s):
'''
257167.069: [GC pause (G1 Evacuation Pause) (mixed)
Desired survivor size 117440512 bytes, new threshold 15 (max 15)
- age 1: 169008 bytes, 169008 total
- age 2: 5032 bytes, 174040 total
- age 3: 2712288 bytes, 2886328 total
- age 4: 820208 bytes, 3706536 total
- age 5: 916704 bytes, 4623240 total
- age 6: 3246680 bytes, 7869920 total
- age 7: 852856 bytes, 8722776 total
- age 8: 605648 bytes, 9328424 total
- age 9: 983264 bytes, 10311688 total
- age 10: 1685120 bytes, 11996808 total
- age 11: 692152 bytes, 12688960 total
- age 12: 2147224 bytes, 14836184 total
- age 13: 1511072 bytes, 16347256 total
- age 14: 1832744 bytes, 18180000 total
- age 15: 1066168 bytes, 19246168 total
257167.069: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 70042, predicted base time: 71.62 ms, remaining time: 128.38 ms, target pause time: 200.00 ms]
257167.069: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 109 regions, survivors: 3 regions, predicted young region time: 6.64 ms]
257167.069: [G1Ergonomics (CSet Construction) finish adding old regions to CSet, reason: predicted time is too high, predicted time: 3.29 ms, remaining time: 0.00 ms, old: 79 regions, min: 79 regions]
257167.069: [G1Ergonomics (CSet Construction) added expensive regions to CSet, reason: old CSet region num not reached min, old: 79 regions, expensive: 29 regions, min: 79 regions, remaining time: 0.00 ms]
257167.069: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 109 regions, survivors: 3 regions, old: 79 regions, predicted pause time: 285.70 ms, target pause time: 200.00 ms]
257167.236: [G1Ergonomics (Mixed GCs) continue mixed GCs, reason: candidate old regions available, candidate old regions: 344 regions, reclaimable: 2334497912 bytes (6.21 %), threshold: 5.00 %]
, 0.1677699 secs]
'''
process_time = 0.0
date_time = ''
mixed_type = ''
gc_time = 0.0
# output similar to GC pause (young)
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\).+?, (\d+\.\d+) secs\]$'
# ^mixed_type
#patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
mixed_type = m.group(2)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Mixed generation collection - ' + mixed_type, gc_time, ''])
def process_full(s):
'''
422052.838: [Full GC (System.gc()) 16G->10G(35G), 34.1545090 secs]
'''
date_time = ''
process_time = 0.0
young_type = ''
gc_time = 0.0
gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, (\d+\.\d+) secs\]$'
# ^ full gc type
#gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, real=(\d+\.\d+) secs\]\s*$'
patternstr = datefmt + gcfmt
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
full_gc_type = m.group(2)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Full GC - ' + full_gc_type , gc_time, ''])
def process_remark_cleanup(s, linenum):
'''
These gc log statements show up on a single line.
Example:
706.065: [GC cleanup 220M->218M(512M), 0.0021548 secs]
706.035: [GC remark, 0.0278976 secs]
108684.812: [GC remark 108684.812: [Finalize Marking, 0.0018014 secs] 108684.814: [GC ref-proc, 0.0089392 secs] 108684.823: [Unloading, 0.0317085 secs], 0.0672140 secs]
'''
gc_type = ''
date_time = ''
process_time = 0.0
gc_time = 0.0
m = re.match(datefmt + r'\[GC remark.+(\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC remark'
process_time = m.group(1)
gc_time = m.group(2)
else:
m = re.match(datefmt + r'\[GC cleanup .+, (\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC cleanup'
date_time = '0'
process_time = m.group(1)
gc_time = m.group(2)
if gc_type != '':
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, gc_type, gc_time])
def process_search_pattern(s, linenum, date_time, process_time):
'''
Look for search strings of interest. If found write to csv.
'''
patternstr = r'({})'.format('|'.join(search_li))
m = re.search(patternstr, s, re.IGNORECASE)
if m:
search_pattern = m.group(1).lower()
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
'''
for search_pattern in search_li:
if re.search(search_pattern, s, re.IGNORECASE):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
break
'''
def process_args():
global dirlist, output_filename, show_relative_path, host_li
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--show_relative_path", help="show relative path in filename column. true or false. Default is false.")
parser.add_argument("--hosts", help="list of hosts, separated by commas.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=r'This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:-PrintReferenceGC')
args = parser.parse_args()
if args.start_dir:
dirlist = [args.start_dir]
output_filename = args.start_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.output_dir:
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.show_relative_path and args.show_relative_path.lower() == 'true':
show_relative_path = True
if args.hosts:
host_li = args.hosts.split(',')
def main():
global mywriter
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
# write column headings
mywriter.writerow(col_li)
for dir in dirlist:
logging.debug(dir)
for root, dirs, files in os.walk(dir):
for name in files:
logging.debug(os.path.join(root, name))
(b, ext) = os.path.splitext(name)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(root, name)
if show_relative_path == True:
# add one for path separator
index = len(dir) + 1
fname = fullpath[index:]
myvisitor_2(fullpath, fname)
else:
myvisitor_2(fullpath, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(dir, myvisitor, extlist)
main()
import os
import csv
import re
import logging
import argparse
import datetime
# list of columns
col_li = ['file name', 'line no.', 'host', 'pid', 'process time', 'gc type/keyword', 'time', 'size before gc', 'size after gc', 'total heap size']
# list of directories to visit
dirlist = [r'E:\log']
show_relative_path = False
def gethomedir():
return os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp())
# list of extensions to visit
extlist = ['.log']
# special patterns to search for
search_li = ['to-space','humongous', r'System.gc\(\)']
# log files were collected and put in directories by hostname, separated by '.'
def get_hostname(dirpath):
(head, tail) = os.path.split(dirpath)
if tail.find('.') > -1:
(hostname, rest) = tail.split('.', 1)
logging.debug("hostname: %s", hostname)
return hostname
else:
return ''
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid
def get_pid(filename):
li = filename.split('pid')
if( len(li) == 1 ):
return li[0]
else:
(pid, rest) = li[1].split('.', 1)
logging.debug("pid: %s", pid)
return pid
def myvisitor(extlist, dirname, names):
global fileinfo
logging.debug("Current directory: %s", dirname)
for f in names:
(p, ext) = os.path.splitext(f)
logging.debug("%s %s", f, ext)
if ext in extlist:
fullpath = os.path.join(dirname, f)
logging.debug(fullpath)
try:
hostname = get_hostname(dirname)
pid = get_pid(f)
fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
#except OSError, detail:
# print detail
def process_jvminfo(s, linenum):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', 'jvm info', '', '', '', '', s])
def process_file(fullpath):
linenum = 0
f = open(fullpath, 'r')
# process line by line to get basic information
for line in f:
linenum += 1
# check for keywords of interest
process_search_pattern(line, linenum)
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'):
process_jvminfo(line, linenum)
elif line.startswith(' ') == False:
process_remark_cleanup_fullgc(line, linenum)
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used,
# gc phases need a multi-line regex to handle
# check for stw pause that spans multiple lines
f.seek(0)
text = f.read()
f.close()
# we are interested in activity that causes a stop-the-world pause and the duration of the gc
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# process multi-line gc phases
process_young(text)
process_mixed(text)
def process_young(s):
'''
These gc log statements show up on multiple lines.
Example:
54614.619: [GC pause (young)
Desired survivor size 109051904 bytes, new threshold 16 (max 25)
- age 1: 9991736 bytes, 9991736 total
54614.620: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 4184, predicted base time: 28.58 ms, remaining time: 971.42 ms, target pause time: 1000.00 ms]
54614.620: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 199 regions, survivors: 4 regions, predicted young region time: 939.32 ms]
54614.620: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 199 regions, survivors: 4 regions, old: 0 regions, predicted pause time: 967.90 ms, target pause time: 1000.00 ms]
54614.644: [SoftReference, 878 refs, 0.0006080 secs]54614.645: [WeakReference, 1371 refs, 0.0003980 secs]54614.645: [FinalReference, 6591 refs, 0.0029020 secs]54614.648: [PhantomReference, 5 refs, 106 refs, 0.0019450 secs]54614.650: [JNI Weak Reference, 0.0090930 secs], 0.0433140 secs]
'''
process_time = 0.0
gc_time = 0.0
pattern = re.compile(r'^(\d*\.\d*): \[GC pause [ \w\(\)]* \(young\)(.+?), (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
# multi-line search
for m in pattern.finditer(s):
process_time = m.group(1)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], process_time, 'Young generation collection', gc_time, '', '', ''])
def process_mixed(s):
process_time = 0.0
gc_time = 0.0
# output similar to GC pause (young)
pattern = re.compile(r'^(\d*\.\d*): \[GC pause \(mixed\)(.+?), (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], process_time, 'Mixed generation collection', gc_time, '', '', ''])
def process_remark_cleanup_fullgc(s, linenum):
'''
These gc log statements show up on a single line.
Example:
44973.752: [GC remark 44973.753: [GC ref-proc44973.753: [SoftReference, 3741 refs, 0.0031090 secs]44973.756: [WeakReference, 6937 refs, 0.0069930 secs]44973.763: [FinalReference, 2459 refs, 0.0038880 secs]44973.767: [PhantomReference, 28 refs, 1275 refs, 0.0029950 secs]44973.770: [JNI Weak Reference, 0.0621620 secs], 0.0803160 secs], 0.1021600 secs]
[Times: user=0.30 sys=0.00, real=0.11 secs]
44973.856: [GC cleanup 22G->22G(30G), 0.0100070 secs]
[Times: user=0.08 sys=0.00, real=0.01 secs]
151413.747: [Full GC151419.349: [SoftReference, 490 refs, 0.0000980 secs]151419.349: [WeakReference, 5036 refs, 0.0004770 secs]151419.349: [FinalReference, 10 refs, 0.0000230 secs]151419.349: [PhantomReference, 129 refs, 346 refs, 0.0000520 secs]151419.349: [JNI Weak Reference, 0.0025470 secs] 19G->19G(30G), 14.2256960 secs]
'''
gc_type = ''
process_time = 0.0
gc_time = 0.0
gc_size_before = ''
gc_size_after = ''
total_heap_size = ''
m = re.match(r'^(\d*\.\d*): \[GC remark \d*\.\d*: (.+), (\d*\.\d*) secs\]$', s)
if m:
gc_type = 'GC remark'
process_time = m.group(1)
gc_time = m.group(3)
else:
m = re.match(r'^(\d*\.\d*): \[GC cleanup (.+), (\d*\.\d*) secs\]$', s)
if m:
gc_type = 'GC cleanup'
process_time = m.group(1)
gc_time = m.group(3)
else:
m = re.match(r'^(\d*\.\d*): \[Full GC(.+) (\d+[MG])->(\d*[MG])\((\d*[MG])\), (\d*\.\d*) secs\]$', s)
if m:
gc_type = 'Full GC'
process_time = m.group(1)
gc_size_before = m.group(3)
gc_size_after = m.group(4)
total_heap_size = m.group(5)
gc_time = m.group(6)
if gc_type != '':
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], process_time, gc_type, gc_time, gc_size_before, gc_size_after, total_heap_size])
def process_search_pattern(s, linenum):
'''
Look for search strings of interest. If found write to csv.
'''
for search_pattern in search_li:
if re.search(search_pattern, s, re.IGNORECASE):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', search_pattern, '', '', '', '', s])
break
def process_args():
global dirlist, output_filename, host_li
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing")
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level")
parser.add_argument("--hosts", help="list of hosts, separated by commas")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:+PrintReferenceGC")
args = parser.parse_args()
if args.start_dir:
dirlist = [args.start_dir]
if args.output_dir:
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.hosts:
host_li = args.hosts.split(',')
def myvisitor_2(fullpath, f):
global fileinfo
try:
hostname = get_hostname(f)
pid = get_pid(f)
fileinfo = {'filename': f, 'host': hostname, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def main():
global mywriter
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
#with open(output_filename, 'wb') as csvfile:
mywriter = csv.writer(csvfile)
# write column headings
mywriter.writerow(col_li)
for dir in dirlist:
logging.debug(dir)
for root, dirs, files in os.walk(dir):
for name in files:
logging.debug(os.path.join(root, name))
(b, ext) = os.path.splitext(name)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(root, name)
if show_relative_path == True:
# add one for path separator
index = len(dir) + 1
fname = fullpath[index:]
myvisitor_2(fullpath, fname)
else:
myvisitor_2(fullpath, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(dir, myvisitor, extlist)
main()
import re
import sys
datefmt = r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): '
# ^ date
# ^ time
# ^ millis
# ^ time zone
# ^ timestamp
filename = ''
# check python version
def check_version():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
if len(sys.argv) < 2:
print("No filename specified.")
print("Usage: {} <filename>".format(sys.argv[0]))
sys.exit(-1)
def process_args():
global filename
filename = sys.argv[1]
def process():
with open(filename, encoding="latin-1") as f:
# the number of lines that have heap size information
count = 0
# the total number of lines processed
linecount = 1
# date and time stamp
date_time = ''
# number of seconds elapsed since the process started
process_time = ''
print(', filename, line_number, date_time, process_time, begin_eden, begin_max_eden, end_eden, end_max_eden, begin_survivor, end_survivor, begin_heap, begin_max_heap, end_heap, end_max_heap')
for line in f:
line = line.strip()
#print(line)
m = re.match(datefmt, line)
if m:
# save current timestamp
date_time = m.group(1)
process_time = m.group(2)
# match heap information in following formats
# [Eden: 9632.0M(9632.0M)->0.0B(9624.0M) Survivors: 192.0M->200.0M Heap: 11.4G(16.0G)->2074.8M(16.0G)]
# [Eden: 4704.0M(9624.0M)->0.0B(9824.0M) Survivors: 200.0M->0.0B Heap: 6786.9M(16.0G)->931.6M(16.0G)], [Metaspace: 61553K->61499K(1105920K)]
edenstr = r'\s*\[Eden: (\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)->(\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\) '
survivorstr = r'Survivors: (\d+\.\d[B|K|M|G])->(\d+\.\d[B|K|M|G]) '
heapstr = r'Heap: (\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)->(\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)\].*'
patternstr = edenstr + survivorstr + heapstr
m = re.match(patternstr, line)
if m:
begin_eden = m.group(1)
begin_max_eden = m.group(2)
end_eden = m.group(3)
end_max_eden = m.group(4)
begin_survivor = m.group(5)
end_survivor = m.group(6)
begin_heap = m.group(7)
begin_max_heap = m.group(8)
end_heap = m.group(9)
end_max_heap = m.group(10)
count += 1
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(
count, filename, linecount, date_time, process_time, begin_eden,
begin_max_eden, end_eden, end_max_eden, begin_survivor, end_survivor,
begin_heap, begin_max_heap, end_heap, end_max_heap))
# match heap information in following formats
# 2022-02-22T14:22:29.770-0600: 3.287: [GC cleanup 18M->18M(3072M), 0.0059295 secs]
# 2022-02-22T14:22:34.301-0600: 7.817: [GC pause (Metadata GC Threshold) (young) (initial-mark) 123M->23M(3072M), 0.1070516 secs]
# 2022-02-22T15:27:01.100-0600: 3829.383: [GC pause (G1 Evacuation Pause) (young) 9827M->6775M(11G), 0.1417604 secs]
# 2022-02-22T16:42:59.750-0600: 8433.267: [GC pause (G1 Humongous Allocation) (young) (initial-mark) 1683M->1433M(3072M), 0.0867971 secs]
#young = r'\[GC pause \([\w ]+\) \(\w+\) (\d+[B|K|M|G])->(\d+[B|K|M|G])\((\d+[B|K|M|G])\), \d+\.\d+ secs\]'
young = r'\[GC [\w \(\)-]+ (\d+[B|K|M|G])->(\d+[B|K|M|G])\((\d+[B|K|M|G])\), \d+\.\d+ secs\]'
patternstr = datefmt + young
m = re.match(patternstr, line)
if m:
# first 2 group matches are used by datefmt
begin_heap = m.group(3)
end_heap = m.group(4)
end_max_heap = m.group(5)
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(
count, filename, linecount, date_time, process_time, '',
'', '', '', '', '',
begin_heap, '', end_heap, end_max_heap))
count += 1
linecount += 1
def main():
check_version()
process_args()
process()
main()
teams = {'NY': 'Giants', 'Dallas' : 'Cowboys', 'Green Bay': 'Packers'}
for k,v in teams.iteritems():
print "%s => %s" % (k,v
# sort, then print
keys = teams.keys()
keys.sort()
for k in keys:
print '%s => %s' % (k, teams[k])
# alternatively
for key in sorted(teams):
print '%s => %s' % (key, teams[key])
import argparse
import csv
import datetime
import logging
import os
import subprocess
import sys
import traceback
# globals
dirlist = []
output_filename = ''
# only run jar tvf on extensions of .jar
ext_li = [ ".jar"]
# only process the following file types in jar tvf output
filetype_ext_li = [ '.class', '.jar']
filename_prefix = 'jar_checker_summary'
col_heading_li = ['artifact', 'size', 'content']
home_dir = os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
def capture_process_output(filename):
cmd = 'jar tvf {0}'.format(filename)
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
output = result.stdout
logging.debug("result output: %s", output)
return output
def myvisitor(fullpath):
logging.debug("fullpath: %s", fullpath)
try:
filename, file_extension = os.path.splitext(fullpath)
if file_extension in ext_li:
logging.debug("fullpath: %s", fullpath)
jar_output = capture_process_output(fullpath)
for line in jar_output.split('\n'):
logging.debug(">>>>line: %s", line)
# only process lines with output
if line:
line_li = line.split()
logging.debug(r'........line_li: <%s>', ','.join(line_li))
size = line_li[0]
content = line_li[7]
content_filename, content_file_extension = os.path.splitext(content)
if content_file_extension in filetype_ext_li:
mywriter.writerow([fullpath, size, content])
except Exception as err:
logging.warning("Error caught while visiting {}".format(fullpath))
logging.warning("Error: {0}".format(err))
traceback.print_exc()
def process_args():
global dirlist, output_filename
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing. Multiple paths should be separated with a comma ','")
parser.add_argument("--output_dir", help="where the output file should be written to.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program searches jar files and outputs information about the content in csv format.")
args = parser.parse_args()
if args.start_dir:
dirlist = args.start_dir.split(',')
if args.output_dir:
output_filename = args.output_dir + os.path.sep + filename_prefix + "-" + gettimestamp() + ".csv"
else:
output_filename = home_dir + os.path.sep + filename_prefix + '-' + gettimestamp() + ".csv"
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
logging.debug("dirlist: %s", dirlist)
def main():
if sys.version_info < (3,7,0):
print("Please use a version of Python > 3.7")
sys.exit(-1)
process_args()
global mywriter
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
mywriter.writerow(col_heading_li)
for root in dirlist:
logging.debug("Processing: %s", root)
for currentpath, dirs, files in os.walk(root):
for name in files:
fullpath = os.path.join(currentpath, name)
logging.debug("root_dir: %s, currentpath: %s, fullpath: %s", root, currentpath, fullpath)
myvisitor(fullpath)
main()
import argparse
import csv
import logging
import os
import re
import sys
import urllib.request
########## This program is used to search for urls in pdf files.
########## The pdf files should be downloaded to a local directory.
########## This program will test the urls for broken links.
########## global variables
start_dir = ''
output_dir = ''
ext_list = ['pdf']
# key: url, value: urlInfo
links = {}
########## end global variables
class UrlInfo:
def __init__(self, url, hostname, files, count, responseCode, valid):
self.url = url
self.hostname = hostname
self.files = files
self.count = count
self.responseCode = responseCode
self.valid = valid
def openFileHelper(filename):
s = ''
with open(filename, 'rb') as fopen:
bytes = fopen.read()
# workaround to handle pdf files as they are binary format
s = bytes.decode('latin-1')
return s
# takes the filename of the file to search
def searchInFile(filename):
pattern = r'(http|https)://([a-zA-Z0-9\.#/%=_?-]*)'
# special characters
# # anchor
# % escape
# ? query string
# other special characters (not used by us):
# &, ~ (home directory location), + (plus sign)
text = openFileHelper(filename)
li = re.findall(pattern, text)
for item in li:
logging.debug('item is: {0}'.format(item));
url = item[0] + '://' + item[1]
# get hostname name
hostname = ''
m = re.match(r'^([a-zA-Z0-9\.-]*)', item[1])
if m:
hostname = m.group(1)
logging.debug('url is: {0}'.format(url));
if not url in links.keys():
#links[url] = 1
urlInfo = UrlInfo(url, hostname, [filename], 1, 0, False)
links[url] = urlInfo
else:
urlInfo = links[url]
urlInfo.count += 1
if filename not in urlInfo.files:
urlInfo.files.append(filename)
def testLinks():
print("testing links...")
key = ''
for key, value in links.items():
try:
responseCode = urllib.request.urlopen(key).getcode()
value.valid = True
value.responseCode = responseCode
except Exception as err:
logging.warning("Url: {0}, Error: {1}".format(key, err))
#traceback.print_exc()
if isinstance(err, urllib.error.HTTPError):
#print('type is: ')
#print(type(err))
value.responseCode = err.code
def outputLinks():
keys = list(links.keys())
keys.sort()
#numlinks = len(keys)
#print('The number of links: {0}'.format(numlinks))
output_filename = os.path.sep.join([output_dir, 'linkchecker.csv'])
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
# header
mywriter.writerow(['url', 'hostname', 'in files', 'response code', 'valid', 'occurrences'])
for key in keys:
value = links[key]
mywriter.writerow([key, value.hostname, ','.join(value.files), value.responseCode, value.valid, value.count])
#if not value.valid == False:
# print('url: {}, occurrences: {}'.format(key, valu:e.count))
#else:
# print('url: {}, in files: {}, occurrences: {}'.format(key, ','.join(value.files), value.count))
def process_args():
global start_dir, output_dir, ext_list
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.", required=True)
parser.add_argument("--output_dir", help="where the output file should be written to. If not specified it will be the same as start_dir.")
parser.add_argument("--ext_list", help="the list of file extensions to search in separated with commas. Default is pdf.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program is used to check files on disk for valid urls.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = start_dir
if args.ext_list:
ext_list = args.ext_list.split(',')
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
logging.debug("start_dir is: " + start_dir)
logging.debug("ext_list is: " + ",".join(ext_list))
def process():
logging.debug("in process(), start_dir is: " + start_dir)
for root, dirs, files in os.walk(start_dir):
for name in files:
(base, extension) = os.path.splitext(name)
logging.debug("file name is: " + name)
logging.debug("base file name is: " + base)
if extension.startswith('.'):
ext = extension.lstrip('.')
ext_match = False
if ext_list:
if ext in ext_list:
ext_match = True
else:
ext_match = True
if ext_match:
input_filename = os.path.join(root, name)
searchInFile(input_filename)
testLinks()
outputLinks()
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process()
main()
import os
import subprocess
import sys
import argparse
start_dir = os.path.expanduser('~')
output_dir = start_dir
heap_summary_cmd = '/home/dixson/work/tools/py/heap-summary.py'
def process_args():
global start_dir, output_dir
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to. If this is not set, this defaults to the start_dir")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program will parse a set of gc log files in a configured directory.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = start_dir
def process():
for root, dirs, files in os.walk(start_dir):
for name in files:
(base, extension) = os.path.splitext(name)
if extension.startswith('.'):
ext = extension.lstrip('.')
if ext.isdigit() or ext == 'current' or ext == 'log':
input_filename = os.path.join(root, name)
output_filename = input_filename + '.csv'
print(input_filename)
print(output_filename)
with open(output_filename, "w") as outfile:
subprocess.run(['python3', heap_summary_cmd, input_filename], stdout=outfile)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process()
main()
import argparse
import logging
import re
import shutil
import sys
multiplespaceregex = r'[\s]+'
filename = ''
search_text_file = ''
replacement_text_file = ''
# replace any white space characters with a regular expression for white space
def replaceWhiteSpace(s):
whitespacefound = False
searchstr = ''
for ch in s:
logging.debug(ch)
m = re.match(r'[\s]', ch)
if m:
logging.debug('I found whitespace')
if whitespacefound == False:
whitespacefound = True
else:
if whitespacefound == True:
searchstr += multiplespaceregex
searchstr += ch
whitespacefound = False
if whitespacefound == True:
searchstr += multiplespaceregex
return searchstr
def searchInFile(filename, searchstr):
text = openFileHelper(filename)
pattern = re.compile(searchstr)
m = pattern.search(text)
if m:
return True
else:
return False
def process_args():
global filename, search_text_file, replacement_text_file
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--filename", help="the file to search.")
parser.add_argument("--search_text_file", help="the text block to search and replace for.")
parser.add_argument("--replacement_text_file", help="the replacement text block.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program helps to replace search replace block text.")
args = parser.parse_args()
if args.filename:
filename = args.filename
if args.search_text_file:
search_text_file = args.search_text_file
if args.replacement_text_file:
replacement_text_file = args.replacement_text_file
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def openFileHelper(filename):
f = open(filename, 'r')
s = f.read()
f.close()
return s
def main():
global filename, search_text_file, replacement_text_file
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
original_text = openFileHelper(filename)
search_text = openFileHelper(search_text_file)
replacement_text = openFileHelper(replacement_text_file)
#searchstr = replaceWhiteSpace(search_text)
searchstr = search_text
logging.debug("searchstr..........")
logging.debug(searchstr)
found = searchInFile(filename, searchstr)
if found:
# copy file
dst = filename + '~'
shutil.copy(filename, dst)
pattern = re.compile(searchstr)
logging.debug("replacement text..........")
logging.debug(replacement_text)
replaced_text = pattern.sub(replacement_text, original_text)
logging.debug("replaced text..........")
logging.debug(replaced_text)
f = open(filename, "w")
n = f.write(replaced_text)
f.close()
main()
import argparse
import logging
import os
import re
import shutil
import sys
########## This program replaces find . -exec sed 's/a/b/g' {} \; because certain characters like backslash were too difficult to handle using bash
########## global variables
start_dir = ''
search_regex_file = ''
replacement_text_file = ''
ext_list = []
########## end global variables
def openFileHelper(filename):
f = open(filename, 'r')
s = f.read()
f.close()
return s
# takes the filename of the file to search
# pattern is the regex pattern to search for
def searchInFile(filename, pattern):
text = openFileHelper(filename)
#pattern = re.compile(searchregex)
m = pattern.search(text)
if m:
return True
else:
return False
def process_args():
global start_dir, search_regex_file, replacement_text_file, ext_list
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.", required=True)
parser.add_argument("--search_regex_file", help="the file containing the regex to search for. The file should contain a single line and trailing whitespace will be stripped.", required=True)
parser.add_argument("--replacement_text_file", help="the file containing the replacement string. The file should contain a single line and trailing whitespace will be stripped.", required=True)
parser.add_argument("--ext_list", help="the list of file extensions to search in separated with commas.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program helps to search and replace text.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.search_regex_file:
search_regex_file = args.search_regex_file
if args.replacement_text_file:
replacement_text_file = args.replacement_text_file
if args.ext_list:
ext_list = args.ext_list.split(',')
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
logging.debug("start_dir is: " + start_dir)
logging.debug("search_regex_file is: " + search_regex_file)
logging.debug("replacement_text_file is: " + replacement_text_file)
logging.debug("ext_list is: " + ",".join(ext_list))
def process(pattern):
for root, dirs, files in os.walk(start_dir):
for name in files:
(base, extension) = os.path.splitext(name)
if extension.startswith('.'):
ext = extension.lstrip('.')
ext_match = False
if ext_list:
if ext in ext_list:
ext_match = True
else:
ext_match = True
if ext_match:
input_filename = os.path.join(root, name)
found = searchInFile(input_filename, pattern)
if found:
# create backup copy
backup_filename = input_filename + '~'
shutil.copy(input_filename, backup_filename)
original_text = openFileHelper(input_filename)
# replace text
replaced_text = pattern.sub(replacement_text, original_text)
# save to original file
f = open(input_filename, "w")
n = f.write(replaced_text)
f.close()
def main():
global search_regex, replacement_text
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
search_regex = openFileHelper(search_regex_file).rstrip()
replacement_text = openFileHelper(replacement_text_file).rstrip()
logging.debug("search_regex is: " + search_regex)
logging.debug("replacement_text is: " + replacement_text)
pattern = re.compile(search_regex)
process(pattern)
main()
# very simple, no need to download web framework and deploy app, simply navigate to directory. directory serves as document root
python3 -m http.server 8888
import argparse
import csv
import logging
import os
import sys
import re
import traceback
'''
This code parses a Java thread dump txt file and outputs it to a csv file,
for easier analysis.
It creates 2 csv files. It creates a .csv and -summary.csv file.
It takes the original thread dump file name and removes the .txt suffix and appends the above suffixes.
Sometimes the thread dump was generated with a long listing and will contain additional fields.
This program will attempt to parse using a long listing and a simple listing strategy.
It is normal to see some errors, as one strategy will fail.
'''
# selected comma for main separator, choose another separator
SUB_SEPARATOR = '|'
title = ''
jni_global_references = ''
heap = ''
start_dir = r'C:\Users\Dixson\Downloads\support\logs\test'
home_dir = os.path.expanduser('~')
output_dir = ''
print_runnable = True
print_thread_count = False
print_other_thread_summary = False
class Substate:
def __init__(self, msg, objectid, otherClassName):
self.msg = msg
self.objectid = objectid
self.otherClassName = otherClassName
# strategy
# Enhancement JDK-8200720 allows for additional fields
class EnhancedLongListingStrategy(object):
def __init__(self):
self.name = 'EnhancedLongListingStrategy'
self.col_li = ['name', 'number', 'type', 'priority', 'os_priority', 'cpu', 'elapsed', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack']
def process_threadprop(self, s):
# replace with underscores for easier parsing
s = s.replace('waiting on condition', 'waiting_on_condition')
s = s.replace('in Object.wait()', 'in_Object.wait()')
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry')
#logging.debug("s: {}".format(s))
thread_name = ''
threadprop = {}
# extract thread name
m = re.match(r'"(.*)"(.*)$', s)
if m:
thread_name = m.group(1)
substring = m.group(2)
if (s.find('daemon') > -1):
# general case, most threads 'labelled' daemon
li = substring.split()
thread_no = li[0].lstrip('#')
thread_type = li[1]
thread_priority = li[2]
thread_ospriority = li[3]
thread_cpu = li[4]
thread_elapsed = li[5]
thread_tid = li[6]
thread_nid = li[7]
thread_status = li[8]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 8:
thread_address = li[9]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'type': thread_type, 'priority': thread_priority, 'os_priority': thread_ospriority,'cpu': thread_cpu, 'elapsed': thread_elapsed, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
# threads not labelled 'daemon'
logging.debug('substring {}'.format(substring))
m = re.match(r' #(\d+) (.*)$', substring)
if m:
thread_no = m.group(1)
substring = m.group(2)
li = substring.split()
thread_priority = li[0]
thread_ospriority = li[1]
thread_cpu = li[2]
thread_elapsed = li[3]
thread_tid = li[4]
thread_nid = li[5]
thread_status = li[6]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) > 6:
thread_address = li[7]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'priority': thread_priority, 'os_priority': thread_ospriority, 'cpu': thread_cpu, 'elapsed':thread_elapsed, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
# jvm threads - only display basic information
# "G1 Conc#0" os_prio=0 cpu=1453.41ms elapsed=52307.25s tid=0x00007f912406ded0 nid=0x1cafd5 runnable
li = substring.split()
thread_ospriority = li[0]
thread_cpu = li[1]
thread_elapsed = li[2]
thread_tid = li[3]
thread_nid = li[4]
thread_status = li[5]
threadprop = {'name' : thread_name, 'os_priority' : thread_ospriority, 'cpu': thread_cpu, 'elapsed' : thread_elapsed, 'tid' : thread_tid, 'nid' : thread_nid, 'status' : thread_status}
return threadprop
# generated with jstack -l
class LongListingStrategy(object):
def __init__(self):
self.name = 'LongListingStrategy'
self.col_li = ['name', 'number', 'type', 'priority', 'os_priority', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack']
def process_threadprop(self, s):
# replace with underscores for easier parsing
s = s.replace('waiting on condition', 'waiting_on_condition')
s = s.replace('in Object.wait()', 'in_Object.wait()')
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry')
#logging.debug("s: {}".format(s))
thread_name = ''
threadprop = {}
# extract thread name
m = re.match(r'"(.*)"(.*)$', s)
if m:
thread_name = m.group(1)
substring = m.group(2)
# general case, most threads 'labelled' daemon
if (s.find('daemon') > -1):
li = substring.split()
thread_no = li[0].lstrip('#')
thread_type = li[1]
thread_priority = li[2]
thread_ospriority = li[3]
thread_tid = li[4]
thread_nid = li[5]
thread_status = li[6]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 8:
thread_address = li[7]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'type': thread_type, 'priority': thread_priority, 'os_priority': thread_ospriority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
#"RMI Reaper" #14 prio=5 os_prio=0 tid=0x00007f2bd1d3f800 nid=0x2161 in Object.wait() [0x00007f2106550000]
#"main" #1 prio=5 os_prio=0 tid=0x00007f2bd000b800 nid=0x20ab waiting on condition [0x00007f2bd5f79000]
#"main" #1 prio=5 os_prio=0 tid=0x00007f79c000d800 nid=0x9091 sleeping[0x00007f79c8305000]
#"GS-swiftJmsSenderContainer-1" #205 prio=5 os_prio=0 tid=0x00007f684645a000 nid=0x6156 sleeping[0x00007f6735dea000]
m = re.match(r' #(\d+) (.*)$', substring)
if m:
thread_no = m.group(1)
substring = m.group(2)
li = substring.split()
thread_priority = li[0]
thread_ospriority = li[1]
thread_tid = li[2]
thread_nid = li[3]
thread_status = li[4]
if len(li) > 5:
thread_address = li[5]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 6:
thread_address = li[5]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'priority': thread_priority, 'os_priority': thread_ospriority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
# jvm threads only display basic information
li = substring.split()
thread_ospriority = li[0]
thread_tid = li[1]
thread_nid = li[2]
thread_status = li[3]
threadprop = {'name' : thread_name, 'os_priority' : thread_ospriority, 'tid' : thread_tid, 'nid' : thread_nid, 'status' : thread_status}
return threadprop
# generated with jstack; missing thread number and os_priority
class SimpleListingStrategy(object):
def __init__(self):
self.name = 'SimpleListingStrategy'
self.col_li = ['name', 'type', 'priority', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack', 'locked_ownable_synchronizers']
def process_threadprop(self, s):
# replace with underscores for easier parsing
s = s.replace('waiting on condition', 'waiting_on_condition')
s = s.replace('in Object.wait()', 'in_Object.wait()')
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry')
thread_name = ''
threadprop = {}
# extract thread name
m = re.match(r'"(.*)"(.*)$', s)
if m:
thread_name = m.group(1)
substring = m.group(2)
# general case, most threads 'labelled' daemon
if (s.find('daemon') > -1):
li = substring.split()
thread_type = li[0]
thread_priority = li[1]
thread_tid = li[2]
thread_nid = li[3]
thread_status = li[4]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 6:
thread_address = li[5]
else:
thread_address = ''
threadprop = {'name': thread_name, 'type': thread_type, 'priority': thread_priority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
li = substring.split()
if len(li) > 2:
thread_priority = li[0]
thread_tid = li[1]
thread_nid = li[2]
thread_status = li[3]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 5:
thread_address = li[4]
else:
thread_address = ''
threadprop = {'name': thread_name, 'priority': thread_priority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
#"GS-GSPingManager:com.gigaspaces.internal.lrmi.stubs.LRMISpaceImpl:1632991357520" Id=721 TIMED_WAITING
thread_id = li[0]
thread_status = li[1]
threadprop = {'name': thread_name, 'tid' : thread_id, 'status' : thread_status}
return threadprop
# end strategy
# an indented line containing java.lang.Thread.State is usually the first line of the block
def process_state(li):
if( len(li) > 0):
#logging.debug(li[0])
m = re.match(r'^\s+java\.lang\.Thread\.State: (.*)$', li[0])
if m:
return m.group(1)
#if( block_li[0].find('java.lang.Thread.State:') > -1):
else:
return ''
else:
return ''
# a stack trace may have additional information I call substate
def process_substate(li):
#logging.debug("In process substate")
#logging.debug("li is: " + ''.join(li))
substateObj = None
substate_li = []
substateObj_li = []
for s in li:
s = s.strip()
logging.debug("s is: '" + s + "'")
if( s.startswith('-')):
substate_li.append(s)
m = re.match(r'-(.*)<(.*)> \(a (.*)\)', s)
if m:
msg = m.group(1).strip()
objectid = m.group(2)
classname = m.group(3)
logging.debug("match found")
subStateObj = Substate(msg, objectid, classname)
substateObj_li.append(subStateObj)
return (SUB_SEPARATOR.join(substate_li), substateObj_li)
def process_stack(li):
stack_li = []
for s in li:
s = s.strip()
stack_li.append(s)
#logging.debug("begin>>>>> %s" % SUB_SEPARATOR.join(stack_li))
#logging.debug("end>>>>>>>")
return SUB_SEPARATOR.join(stack_li)
def process_heap(li):
heap_li = []
for s in li:
s = s.strip()
heap_li.append(s)
s = SUB_SEPARATOR.join(heap_li)
return s.replace(',', '\'')
# the information in this block occurs below the stack trace
def process_locked_ownable_sync(block_li):
#logging.debug("block_li in locked_ownable_synchronizers: {}".format(block_li))
if not block_li:
return ''
length = len(block_li)
for n in range(0, length):
s = block_li[n]
if s.find('Locked ownable synchronizers:') > -1 :
# return value in next line
if n + 1 < length:
return block_li[n + 1].strip().lstrip('-')
return ''
def process_block(strategy, block_li, nextblock_li, threadprop_by_name):
global title, jni_global_references, heap
logging.debug("BEGIN BLOCK")
logging.debug(block_li)
logging.debug("END BLOCK")
s = block_li[0]
if (s.startswith('"')):
# thread name found
threadprop = strategy.process_threadprop(s)
threadprop['state'] = process_state(block_li[1:])
threadprop['block'] = block_li[1:]
# there can be more than 1 thread referenced
(substate, substateObj) = process_substate(block_li[1:])
threadprop['substate'] = substate
threadprop['substateObj'] = substateObj
threadprop['stack'] = process_stack(block_li[1:])
threadprop['locked_ownable_synchronizers'] = process_locked_ownable_sync(nextblock_li)
threadprop_by_name[threadprop['name']] = threadprop
elif (s.startswith('Full thread dump')):
title = s
elif (s.startswith('JNI global references') or s.startswith('JNI global refs')):
jni_global_references = s
elif (s == 'Heap'):
heap = process_heap(block_li[1:])
else:
logging.debug('Skipping block that starts with line: {}'.format(s))
return threadprop_by_name
# print substate in another format for easy viewing
# print thread name, id, status, object id, classname
def print_substate(threadprop_by_name, mywriter):
mywriter.writerow(['substate (redux)', 'thread', 'tid', 'msg', 'other_oid', 'other_classname (e.g, locked/waiting on)'])
for k in threadprop_by_name.keys():
thread_name = k
#logging.debug(threadprop_by_name[k])
threadprop = threadprop_by_name[k]
tid = threadprop['tid'].split('=')[1]
if 'substateObj' in threadprop:
substatusObj_li = threadprop_by_name[k]['substateObj']
for substatusObj in substatusObj_li:
mywriter.writerow(['',thread_name, tid, substatusObj.msg, substatusObj.objectid, substatusObj.otherClassName ])
def print_runnable_stack(threadprop_by_name, mywriter):
mywriter.writerow(['runnable', 'thread (in state RUNNABLE)', 'stack'])
for k in threadprop_by_name.keys():
thread_name = k
threadprop = threadprop_by_name[k]
state = threadprop['state']
if state == 'RUNNABLE':
# re-format original stack trace
block = [line.strip() for line in threadprop['block'][1:]]
block_s = "\n".join(block)
mywriter.writerow(['',thread_name,block_s])
def count_occurrences(threadprop_by_name, field, mywriter, column_name):
logging.debug("field is: " + field);
values = []
count_dict = {}
for k in threadprop_by_name.keys():
value = threadprop_by_name[k][field]
values.append(value)
for item in values:
if item in count_dict:
count = count_dict[item]
count += 1
count_dict[item] = count
else:
count_dict[item] = 1
mywriter.writerow([column_name, 'value', 'count'])
'''
for key, value in sorted(count_dict.iteritems(), key=lambda (k,v): (v,k), reverse=True):
#print ", %s, %s" % (key[:160], value)
mywriter.writerow(['', key[:160], value])
'''
sorted_keys = sorted(count_dict.keys())
for key in sorted_keys:
value = count_dict[key]
s = key[:160]
if not s:
s = "EMPTY"
mywriter.writerow(['', s, value])
def print_threads(strategy, threadprop_by_name, mywriter):
mywriter.writerow(strategy.col_li)
mywriter.writerow(['Title', title])
mywriter.writerow(['JNI global references', jni_global_references])
if heap:
mywriter.writerow(['Heap', heap])
mywriter.writerow([])
mywriter.writerow(['** Begin threads **'])
keys = sorted(threadprop_by_name.keys())
#keys.sort()
for k in keys:
#logging.debug('%s => %s' % (k, threadprop_by_name[k]))
li = []
threadprop_dict = threadprop_by_name[k]
for col in strategy.col_li:
if col in threadprop_dict:
s = threadprop_dict[col]
else:
s = ''
s = s if not None else ''
li.append(s)
mywriter.writerow(li)
def write_csv(strategy, threadprop_by_name, filename):
# write output to csv file
# output compilation of thread properties
filename_woext, file_extension = os.path.splitext(filename)
output_filename = output_dir + os.path.sep + filename_woext + '.csv'
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
print_threads(strategy, threadprop_by_name, mywriter)
# output summary
output_filename = output_dir + os.path.sep + filename_woext + '-summary.csv'
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
if print_thread_count == True:
count_occurrences(threadprop_by_name, 'status', mywriter, 'status')
count_occurrences(threadprop_by_name, 'state', mywriter, 'state')
count_occurrences(threadprop_by_name, 'substate', mywriter, 'linked to')
# an application with many threads in a certain section of code may indicate a problem
count_occurrences(threadprop_by_name, 'stack', mywriter, 'stack (first few lines of)')
if print_other_thread_summary == True:
print_substate(threadprop_by_name, mywriter)
if print_runnable == True:
print_runnable_stack(threadprop_by_name, mywriter)
def process_file(fullpathname, filename):
line_number = 0
f = open(fullpathname)
# allblock_li is all the thread text sections saved to a list
allblock_li = []
# current_block_li is a text section containing information for a single thread
current_block_li = []
# k thread name -> v dictionary with key (column heading or property name), value pairs for that thread
threadprop_by_name = {}
firsttime = True
for line in f:
line_number += 1
s = line.rstrip()
#logging.debug(">> %d: %s" % (line_number, s))
# lines beginning with white space
m = re.match(r'^(\s)+(.*)$', s)
# separate lines in file into sections, ie, block
# save for future processing
# need to be able to look ahead into block and next block
if( not m):
# new block found
if( firsttime == False ):
#threadprop_by_name = process_block(strategy, current_block_li, threadprop_by_name)
allblock_li.append(current_block_li)
else:
firsttime = False
# reset current_block_li
current_block_li = [s]
else:
current_block_li.append(s)
allblock_li.append(current_block_li)
#threadprop_by_name = process_block(strategy, current_block_li, threadprop_by_name)
# initialize strategies
strategy_li = [EnhancedLongListingStrategy(), LongListingStrategy(), SimpleListingStrategy()]
# try each strategy until one processes cleanly
for strategy in strategy_li:
try:
threadprop_by_name = {}
length = len(allblock_li)
for n in range(0, length):
#for block in allblock_li:
block = allblock_li[n]
if (n+1 >= length):
nextblock = None
else:
nextblock = allblock_li[n+1]
threadprop_by_name = process_block(strategy, block, nextblock, threadprop_by_name)
write_csv(strategy, threadprop_by_name, filename)
# if this succeeds, no need to try next strategy
break
except Exception as err:
logging.warning("Error caught while parsing {} using strategy {}".format(filename, strategy.name))
logging.warning("Error: {0}".format(err))
traceback.print_exc()
def process_args():
global start_dir, output_dir, print_runnable, print_thread_count, print_other_thread_summary
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing")
parser.add_argument("--output_dir", help="where the output file should be written to")
parser.add_argument("--print_runnable", help="print the stack traces of the runnable threads. Default is true")
parser.add_argument("--print_thread_count", help="print a summary of thread counts by class. Default is false")
parser.add_argument("--print_other_thread_summary", help="print a summary of the referenced threads. Default is false")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a set of thread dump files generated with jstack or kill -3.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.output_dir:
output_dir = args.output_dir + os.path.sep
else:
output_dir = home_dir + os.path.sep
if args.print_runnable:
if args.print_runnable.lower() == 'false' or args.print_runnable.lower() == 'f':
print_runnable = False
if args.print_thread_count:
if args.print_thread_count.lower() == 'true' or args.print_thread_count.lower() == 't':
print_thread_count = True
if args.print_other_thread_summary:
if args.print_other_thread_summary.lower() == 'true' or args.print_other_thread_summary.lower() == 't':
print_other_thread_summary = True
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
for start, dirs, files in os.walk(start_dir):
for name in files:
if name.endswith('txt') or name.endswith('tdump'):
process_file(os.path.join(start, name), name)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment