Skip to content

Instantly share code, notes, and snippets.

@dixsonhuie
Last active October 7, 2025 20:34
Show Gist options
  • Select an option

  • Save dixsonhuie/825194e24e6dfa66ec230aeb98ba2c2b to your computer and use it in GitHub Desktop.

Select an option

Save dixsonhuie/825194e24e6dfa66ec230aeb98ba2c2b to your computer and use it in GitHub Desktop.
python examples
import os
import sys
import csv
import re
import logging
import argparse
import datetime
col_li = ['filename', 'line_number', 'host', 'pid', 'comp', 'id', 'time', 'ms', 'category', 'level', 'logger', 'message']
dirlist = [r'E:\log']
start_date = None
end_date = None
# date format used to convert command line arguments into a datetime object
# example: 2021-09-14
filter_date_fmt = '%Y-%m-%d'
# adding hours, minutes and seconds
filter_datetime_fmt = filter_date_fmt + ' %H:%M:%S'
home_dir = os.path.expanduser('~')
filename_prefix = 'app_log_summary'
output_filename = ''
show_fullpath = False
# list of extensions to visit
extlist = ['\.\d+', '.log', '.out', '.stdouterr', '.err']
# regex representing entire date time portion from a line in a log file
# example: 2021-09-14 16:22,124
datefmt = r'(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d)'
# search for the following strings that may indicate an error
error_li = [ 'warning', 'severe', 'exception', 'error', 'failure', 'Long GC collection']
# for setting log level
level_li = ['SEVERE', 'WARNING', 'INFO', 'CONFIG', 'FINE', 'FINER', 'FINEST']
host_li = []
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
# check if string matches any of the hostnames
def get_hostname(s):
for host in host_li:
pattern = '.*({0}).*'.format(host)
m = re.match(pattern, s)
if m:
return m.group(1)
return ''
# check if filename contains pid and component information
# only works if filename format has not changed
def get_pid(s):
comp = ''
id = ''
host = ''
pid = ''
patternstr = r'.*(gsc|manager|gsm|lus)_(\d+)-([\w\.]+)-(\d+).*'
m = re.match(patternstr, s)
if m:
comp = m.group(1)
id = m.group(2)
host = m.group(3)
pid = m.group(4)
else:
# other processes: gsa, GSWebUI, ui, service
patternstr = r'.*(gsa|GSWebUI|ui|service)-([\w\.]+)-(\d+).*'
m = re.match(patternstr, s)
if m:
comp = m.group(1)
host = m.group(2)
pid = m.group(3)
return (comp, id, host, pid)
def process_file(fullpath):
line_number = 0
with open(fullpath, encoding="latin-1") as f:
sDate = ''
dtDate = None
millis = ''
for line in f:
found = False
line_number += 1
# skip lines beginning with white space
if re.match(r'\s', line):
continue
# save the timestamp for lines with no timestamp
patternstr = r'.*{}.*'.format(datefmt)
m = re.match(patternstr, line)
if m:
sDate = m.group(1)
dtDate = datetime.datetime.strptime(sDate, filter_datetime_fmt)
millis = m.group(2)
# filter out log lines by date
if start_date is not None and dtDate is not None and dtDate < start_date:
continue
if end_date is not None and dtDate is not None and dtDate > end_date:
continue
for error_pattern in error_li:
if re.search(error_pattern, line, re.IGNORECASE):
found = True
break
logging.debug("log date as string: %s, log date: %s", sDate, '' if dtDate is None else dtDate.strftime(filter_date_fmt))
if found == True:
# truncate the line
line = line[:300]
line = line.rstrip()
logging.debug("Line: %s", line)
process_line(line, fullpath, line_number, sDate, millis)
def process_line(s, fullpath, line_number, date, millis):
# example: 2017-01-05 14:11:21,821 LUS INFO [com.sun.jini.reggie] - Exception
# example: 2016-12-31 17:38:57,334 pmds.deployment-1.8.9-pu.18 [2] WARNING [com.gigaspaces.core.common] - Primary space is unavailable
patternstr = r'{}{}'.format(datefmt, r' ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$')
m = re.match(patternstr, s)
#m = re.match(r'(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$', s)
'''
^ date ^ millis ^ category ^ optional^ level ^ logger ^ message
match 0 or 1 times
'''
if m:
# 1 date
# 2 millis
# 3 category
# 4 optional, '[2]' in comment above
# 5 level
# 6 logger
# 7 message
category = ''
level = ''
if m.group(4) == None:
category = m.group(3)
# extract level information
# eg., LUS INFO
for i in level_li:
index = category.find(i)
if index >= 0:
level = category[index:]
category = category[0:index]
break
else:
category = m.group(3) + m.group(4)
level = m.group(5)
# this group also grabs the space that may come after this optional string; need to strip it out
category = category.strip()
level = level.strip()
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], fileinfo['pid'], fileinfo['comp'], fileinfo['id'], m.group(1), m.group(2), category, level, m.group(6), m.group(7)])
else:
# sometimes clients just provide output of the gs-agent process
# [gsc][1/10120] 2017-10-11 10:52:37,557 CommonClassLoader WARNING [net.jini.discovery.LookupLocatorDiscovery] - java.net.SocketTimeoutException: connect timed out - using unicast locator 10.10.10.117:4174 - delay next lookup by 1,000 ms
patternstr = r'{}{}{}'.format(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*', datefmt, r' ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$')
m = re.match(patternstr, s)
#m = re.match(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$', s)
'''
^ proc ^ id ^ pid - the rest is a repeat of the regex used above
'''
if m:
# 1 component
# 2 id
# 3 pid
# 4 date
# 5 millis
# 6 category
# 7 optional
# 8 level
# 9 logger
# 10 message
category = ''
level = ''
if m.group(7) == None:
category = m.group(6)
# extract level information
for i in level_li:
index = category.find(i)
if index >= 0:
level = category[index:]
category = category[0:index]
break
category = category.strip()
if category.upper() == m.group(1).upper():
category = ''
else:
category = m.group(6) + m.group(7)
level = m.group(8)
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], m.group(3), m.group(1), m.group(2), m.group(4), m.group(5), category, level, m.group(9), m.group(10)])
else:
#[manager][1/13986] Caused by: com.gigaspaces.security.AuthenticationException: Authentication request is invalid - you are not logged in.
# log message pattern missing timestamp
patternstr = r'{}{}'.format(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*', r'(.*)$')
# ^comp ^id ^pid ^message
m = re.match(patternstr, s)
if m:
# 1 component
# 2 id
# 3 pid
# 4 message
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], m.group(3), m.group(1), m.group(2), date, millis, '', '', '', m.group(4)])
else:
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], fileinfo['pid'], fileinfo['comp'], fileinfo['id'], date, millis, '', '', '', s])
def process_args():
global dirlist, start_date, end_date, filename_prefix, output_filename, host_li, show_fullpath
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to.")
parser.add_argument("--start_date", help="the date to begin processing errors. Log lines with dates before the start date will be filtered out. Example format: 2021-09-21")
parser.add_argument("--end_date", help="the date to end processing errors. Log lines with dates after the end date will be filtered out. Example format: 2021-09-21")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--hosts", help="list of hosts, separated by commas.")
parser.add_argument("--filename_prefix", help="Output filename prefix.")
parser.add_argument("--show_fullpath", help="Output the full path. Default is false.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a set of XAP log files formatted with standard XAP out-of-the-box settings.")
args = parser.parse_args()
if args.filename_prefix:
filename_prefix = args.filename_prefix
if args.start_dir:
dirlist = [args.start_dir]
if args.start_date:
start_date = datetime.datetime.strptime(args.start_date, filter_date_fmt)
if args.end_date:
end_date = datetime.datetime.strptime(args.end_date, filter_date_fmt)
if args.output_dir:
output_filename = args.output_dir + os.path.sep + filename_prefix + "-" + gettimestamp() + ".csv"
else:
output_filename = home_dir + os.path.sep + filename_prefix + '-' + gettimestamp() + ".csv"
if args.show_fullpath:
show_fullpath = args.show_fullpath
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.hosts:
host_li = args.hosts.split(',')
def myvisitor(extlist, dirname, names):
global fileinfo
logging.debug("Current directory: %s", dirname)
for f in names:
(b, ext) = os.path.splitext(f)
logging.debug("Filename base: %s Ext: %s", b, ext)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(dirname, f)
logging.debug("Fullpath: %s", fullpath)
try:
hostname = get_hostname(f)
fileinfo = {'host': hostname}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
#except OSError, detail:
# print detail
break
def myvisitor_2(fullpath, start_dir, filename):
global fileinfo
try:
relative_path = "{}{}".format('.', fullpath.replace(start_dir, '', 1))
hostname = get_hostname(relative_path)
if not show_fullpath:
path = relative_path
else:
path = fullpath
(comp, id, host, pid) = get_pid(filename)
if hostname == '':
hostname = host
fileinfo = {'host': hostname, 'path': path, 'comp': comp, 'id': id, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
global mywriter
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
mywriter.writerow(col_li)
for i in dirlist:
logging.debug("Processing: %s", i)
for root, dirs, files in os.walk(i):
for name in files:
logging.debug(os.path.join(root, name))
(b, ext) = os.path.splitext(name)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(root, name)
myvisitor_2(fullpath, i, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(i, myvisitor, extlist)
main()
import argparse
import logging
import os
import sys
start_dir = os.path.expanduser('~')
show_relpath = False
filter_li = []
class file_suffix_filter:
# heap dump files
# extlist = ['.hprof']
def __init__(self, li):
self.extlist = li
def hasFileMatch(self):
return True
def hasDirectoryMatch(self):
return False
def isFileMatch(self, path, filename):
(base, ext) = os.path.splitext(filename)
if ext in self.extlist:
return True
else:
return False
class named_dir_filter:
def __init__(self, named_dir_li):
self.dirname_li = named_dir_li
def hasFileMatch(self):
return False
def hasDirectoryMatch(self):
return True
def isDirectoryMatch(self, dirname):
logging.debug("dirname is:" + dirname)
if dirname in self.dirname_li:
return True
else:
return False
class large_file_filter:
def __init__(self, f_size):
self.file_size = f_size
def hasFileMatch(self):
return True
def hasDirectoryMatch(self):
return False
def isFileMatch(self, path, filename):
fname = os.path.join(path, filename)
if not os.path.islink(fname):
f_size = os.path.getsize(fname)
if f_size > self.file_size:
return True
else:
return False
else:
return False
# recursively visit directory and its children
def process():
for root, dirs, files in os.walk(start_dir):
rel_dir = os.path.relpath(root, start_dir)
for name in files:
for filter in filter_li:
if filter.hasFileMatch() and filter.isFileMatch(root, name):
if show_relpath == True:
filename = os.path.join('.', rel_dir, name)
print(filename)
else:
filename = os.path.join(root, name)
print(filename)
for dir in dirs:
for filter in filter_li:
if filter.hasDirectoryMatch() and filter.isDirectoryMatch(dir):
if show_relpath == True:
filename = os.path.join('.', rel_dir, dir)
print(filename)
else:
filename = os.path.join(root, dir)
print(filename)
def process_args():
global start_dir, show_relpath, filter_li;
is_file_suffix_filter = True
file_suffix = ['.hprof']
is_named_dir_filter = True
is_large_file_filter = True
large_file_filter_size = 1_000_000_000
named_dir = ['logs']
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="The root directory to begin processing. Default is the user's home directory.")
parser.add_argument("--show_relpath", help="Output the relative path, otherwise show full path. Default is False.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--file_suffix_filter", choices=['true', 'false'], help="Filter in files that match a suffix. Default is true.")
parser.add_argument("--file_suffix", help="A list of file suffixes to be used with --file_suffix_filter, separated by commas. Default suffixes: '.hprof'.")
parser.add_argument("--named_dir_filter", choices=['true', 'false'], help="Filter in directories based on a name. Default is true.")
parser.add_argument("--named_dir", help="A list of directories used with --named_dir_filter, separated by commas. Default directories: 'logs'. Other suggestions: target,work,deploy")
parser.add_argument("--large_file_filter", choices=['true', 'false'], help="Filter in files larger than a default size of {}. Default is true.".format(large_file_filter_size))
parser.add_argument("--large_file_filter_size", help="Large file filter size.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program will recurse a directory and look for files to be cleaned up.")
# process arguments
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.show_relpath:
if args.show_relpath.lower() == 'true' or args.show_relpath.lower() == 't':
show_relpath = True
if args.file_suffix_filter:
if args.file_suffix_filter.lower() == 'true' or args.file_suffix_filter.lower() == 't':
is_file_suffix_filter = True
else:
is_file_suffix_filter = False
if args.file_suffix:
file_suffix = args.file_suffix.split(',')
if args.named_dir_filter:
if args.named_dir_filter.lower() == 'true' or args.named_dir_filter.lower() == 't':
is_named_dir_filter = True
else:
is_named_dir_filter = False
if args.named_dir:
named_dir = args.named_dir.split(',')
if args.large_file_filter:
if args.large_file_filter.lower() == 'true' or args.large_file_filter.lower() == 't':
is_large_file_filter = True
else:
is_large_file_filter = False
if args.large_file_filter_size:
large_file_filter_size = int(args.large_file_filter_size)
# set values based on arguments
if is_file_suffix_filter == True:
filter_li.append(file_suffix_filter(file_suffix))
if is_named_dir_filter == True:
filter_li.append(named_dir_filter(named_dir))
if is_large_file_filter == True:
filter_li.append(large_file_filter(large_file_filter_size))
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process()
main()
import argparse
import csv
from datetime import datetime
import logging
import sys
file = r'C:\Users\Dixson\tmp.csv'
before_dt = None
after_dt = None
col_no = 1
def process(fin):
with open('tmp.csv', 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
with open(fin, newline='') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
value = row[col_no]
dt = convert_dt(value)
logging.debug("Value: {}, date: {} on column {}".format(value, dt, col_no))
if dt == None:
mywriter.writerow(row)
#print(', '.join(row))
continue
if (before_dt == None or dt < before_dt):
if( after_dt == None or dt > after_dt):
mywriter.writerow(row)
#print(', '.join(row))
# example date: 2017-01-05 14:11:21
def convert_dt(s):
try:
return datetime.strptime(s, '%Y-%m-%d %H:%M:%S')
except Exception as error:
return None
def process_args():
global file, before_dt, after_dt, col_no
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("-f", "--file", help="the input file. If not provided, /dev/stdin is used.")
parser.add_argument("--before", help='include dates before provided date. E.g., --before "2017-01-05 14:11:21"')
parser.add_argument("--after", help="include dates after provided date.")
parser.add_argument("--columnNumber", help="the column number that has the date field, beginning at 0.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a csv file using the date filter criteria.")
args = parser.parse_args()
if args.file:
file = args.file
else:
# won't work on Windows
file = '/dev/stdin'
if args.before:
before_dt = convert_dt(args.before)
if args.after:
after_dt = convert_dt(args.after)
if args.columnNumber:
col_no = int(args.columnNumber)
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process(file)
main()
import argparse
import csv
import datetime
import logging
import os
import re
import sys
'''
This program parses a gc log file for stop the world phases and keywords to csv file.
PrintGCDateStamps has been enabled
PrintAdaptiveSizePolicy has been enabled
young gc types - G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold
mixed types - G1 Evacuation Pause
full gc types - Allocation Failure, System.gc()
'''
# list of columns
col_li = ['file name', 'line no.', 'host', 'pid','date time', 'process time', 'gc type/keyword', 'time', 'comment']
# list of special extensions to visit
# versioned logs will be in the format .1, .2, etc. This is checked elsewhere.
extlist = ['.current']
#extlist = ['.log', '.current', '.1', '.2']
# special patterns to search for
search_li = ['to-space','humongous']
#search_li = ['to-space','humongous', r'System.gc\(\)']
datefmt = r'(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): '
# ^ date time ^ timestamp
def gethomedir():
return os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
# globals
# list of directories to visit
dirlist = [r'E:\log']
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp())
show_relative_path = False
enable_humongous = False
# a list of possible host names
host_li = []
# log files were collected and put in directories by hostname, separated by '.'
def get_hostname(dirpath):
for h in host_li:
if dirpath.find(h) > -1:
return h
return ''
# get the pid from the log file name
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid
def get_pid(filename):
li = filename.split('pid')
if( len(li) == 1 ):
return li[0]
else:
(pid, rest) = li[1].split('.', 1)
logging.debug("pid: %s", pid)
return pid
# not used, this is a deprecated version of the visitor
#def myvisitor(extlist, dirname, names):
# global fileinfo
# logging.debug("Current directory: %s", dirname)
# for f in names:
# (p, ext) = os.path.splitext(f)
# logging.debug("%s %s", f, ext)
# if ext in extlist:
# fullpath = os.path.join(dirname, f)
# logging.debug(fullpath)
# try:
# hostname = get_hostname(dirname)
# pid = get_pid(f)
# fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid}
#
# process_file(fullpath)
# except OSError as err:
# print("OS error: {0}".format(err))
#
# #except OSError, detail:
# # print detail
def myvisitor_2(fullpath, f):
global fileinfo
try:
hostname = get_hostname(f)
pid = get_pid(f)
fileinfo = {'filename': f, 'host': hostname, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def process_file(fullpath):
# this section processes log messages that occupy a single linen
linenum = 0
f = open(fullpath, 'r')
date_time = ''
process_time = ''
# process line by line to get basic information
for line in f:
linenum += 1
m = re.match(r'^' + datefmt, line)
if m:
# save current timestamp
date_time = m.group(1)
process_time = m.group(2)
# check for keywords of interest
process_search_pattern(line, linenum, date_time, process_time)
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'):
process_jvminfo(line, linenum)
elif line.startswith(' ') == False:
# check for stw pauses that appear on one line
process_remark_cleanup(line, linenum)
# this section processes log messages that span multiple lines
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used,
# gc phases need a multi-line regex to handle
# check for stw pause that spans multiple lines
f.seek(0)
text = f.read()
f.close()
# we are interested in activity that causes a stop-the-world pause and the duration of the gc
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# https://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
# process multi-line gc phases
process_young_mixed(text)
process_full(text)
###############################################################################
# methods that process a multi-line messages
###############################################################################
def process_young_mixed(s):
'''
young generation and mixed collection share similar formats
These gc log statements show up on multiple lines.
Example:
2017-09-01T16:12:51.133+0000: 134.345: [GC pause (Metadata GC Threshold) (young) (initial-mark)
Desired survivor size 48234496 bytes, new threshold 15 (max 15)
134.346: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 0, predicted base time: 10.00 ms, remaining time: 990.00 ms, target pause time: 1000.00 ms]
134.346: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 63 regions, survivors: 0 regions, predicted young region time: 4209.46 ms]
134.346: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 63 regions, survivors: 0 regions, old: 0 regions, predicted pause time: 4219.46 ms, target pause time: 1000.00 ms]
, 0.0325663 secs]
'''
date_time = ''
process_time = 0.0
young_mixed_type = '' # young or mixed
secondary_type = '' # Eg, G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold
initial_mark = '' # tertiary type, associated with G1 Humongous Allocation and Metadata GC Threshold
gc_time = 0.0
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \((young|mixed)\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
'''
^secondary ^young/mixed ^initial_mark ^ elapsed time
'''
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
#pattern = re.compile(r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d\+\d\d\d\d): (\d*\.\d*): \[GC pause \(([ \w\.\(\)]*)\) \(young\).+?, (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
date_time = m.group(1)
process_time = m.group(2)
young_mixed_type = m.group(4)
secondary_type = m.group(3)
if m.group(5) == None:
initial_mark = ''
else:
tmp = m.group(5)
tmp = tmp.strip('() ')
initial_mark = ' ' + tmp
gc_time = m.group(6)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'GC pause - ' + young_mixed_type + ' ' + secondary_type + initial_mark, gc_time, ''])
def process_mixed(s):
'''
2017-09-01T17:53:24.732+0000: 6167.945: [GC pause (G1 Evacuation Pause) (mixed)
Desired survivor size 48234496 bytes, new threshold 1 (max 15)
- age 1: 303167832 bytes, 303167832 total
6167.945: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 8728, predicted base time: 24.66 ms, remaining time: 975.34 ms, target pause time: 1000.00 ms]
6167.945: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 105 regions, survivors: 74 regions, predicted young region time: 305.85 ms]
6167.945: [G1Ergonomics (CSet Construction) finish adding old regions to CSet, reason: reclaimable percentage not over threshold, old: 19 regions, max: 359 regions, reclaimable: 751186712 bytes (5.00 %), threshold: 5.00 %]
6167.945: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 105 regions, survivors: 74 regions, old: 19 regions, predicted pause time: 362.13 ms, target pause time: 1000.00 ms]
6168.115: [G1Ergonomics (Mixed GCs) do not continue mixed GCs, reason: reclaimable percentage not over threshold, candidate old regions: 335 regions, reclaimable: 751186712 bytes (5.00 %), threshold: 5.00 %]
, 0.1695338 secs]
'''
process_time = 0.0
date_time = ''
mixed_type = ''
gc_time = 0.0
# output similar to GC pause (young)
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
# ^mixed_type
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
date_time = m.group(1)
process_time = m.group(2)
mixed_type = m.group(3)
gc_time = m.group(5)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Mixed generation collection - ' + mixed_type, gc_time, ''])
def process_full(s):
'''
Full GC statements are also output to multiple lines.
2018-07-30T11:39:47.643-0400: 174.007: [Full GC (Heap Inspection Initiated GC) 2018-07-30T11:39:47.643-0400: 174.007: [Class Histogram (before full gc):
2018-07-25T11:59:08.922+0000: 1098967.077: [Full GC (System.gc()) 2018-07-25T11:59:08.927+0000: 1098967.081: [Class Histogram (before full gc):
2018-07-21T12:11:41.060+0000: 387110.898: [Full GC (Allocation Failure) 2018-07-21T12:11:41.060+0000: 387110.898: [Class Histogram (before full gc):
...
..., real=6.79 secs]
'''
date_time = ''
process_time = 0.0
young_type = ''
gc_time = 0.0
gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, real=(\d+\.\d+) secs\]\s*$'
# ^ full gc type
patternstr = datefmt + gcfmt
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
date_time = m.group(1)
process_time = m.group(2)
full_gc_type = m.group(3)
gc_time = m.group(4)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Full GC - ' + full_gc_type , gc_time, ''])
###############################################################################
# end methods that process a multi-line messages
###############################################################################
###############################################################################
# methods that process a single line
###############################################################################
def process_jvminfo(s, linenum):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', '', 'jvm info', '', s])
def process_remark_cleanup(s, linenum):
'''
These gc log statements show up on a single line.
Example:
2017-09-01T16:12:51.175+0000: 134.388: [GC remark 2017-09-01T16:12:51.175+0000: 134.388: [Finalize Marking, 0.0058528 secs] 2017-09-01T16:12:51.181+0000: 134.394: [GC ref-proc, 0.0001349 secs] 2017-09-01T16:12:51.181+0000: 134.394: [Unloading, 0.0032643 secs], 0.0100601 secs]
44973.856: [GC cleanup 22G->22G(30G), 0.0100070 secs]
[Times: user=0.08 sys=0.00, real=0.01 secs]
'''
gc_type = ''
date_time = ''
process_time = 0.0
gc_time = 0.0
m = re.match(r'^' + datefmt + r'\[GC remark .*, (\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC remark'
date_time = m.group(1)
process_time = m.group(2)
gc_time = m.group(3)
else:
m = re.match(r'^' + datefmt + r'\[GC cleanup .+, (\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC cleanup'
date_time = m.group(1)
process_time = m.group(2)
gc_time = m.group(3)
if gc_type != '':
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, gc_type, gc_time])
def process_search_pattern(s, linenum, date_time, process_time):
'''
Look for search strings of interest. If found write to csv.
'''
patternstr = r'({})'.format('|'.join(search_li))
m = re.search(patternstr, s, re.IGNORECASE)
if m:
search_pattern = m.group(1).lower()
if search_pattern == 'humongous' and enable_humongous == False:
return
else:
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
'''
for search_pattern in search_li:
if re.search(search_pattern, s, re.IGNORECASE):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
break
'''
###############################################################################
# end methods that process a single line
###############################################################################
def process_args():
global dirlist, output_filename, enable_humongous, show_relative_path, host_li
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--enable_humongous", help='True enables inclusion of any log messages that have to do with humongous allocation. Default is False.')
parser.add_argument("--show_relative_path", help="show relative path in filename column. true or false. Default is false.")
parser.add_argument("--hosts", help="list of hosts, separated by commas.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=r'This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCCause -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:+PrintReferenceGC')
args = parser.parse_args()
if args.start_dir:
dirlist = [args.start_dir]
if args.output_dir:
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.enable_humongous:
if args.enable_humongous.lower() == 'true' or args.enable_humongous.lower() == 't':
enable_humongous = True
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.show_relative_path and args.show_relative_path.lower() == 'true':
show_relative_path = True
if args.hosts:
host_li = args.hosts.split(',')
def main():
global mywriter
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
# write column headings
mywriter.writerow(col_li)
for dir in dirlist:
logging.debug(dir)
for root, dirs, files in os.walk(dir):
for name in files:
logging.debug(os.path.join(root, name))
(b, extension) = os.path.splitext(name)
ext = extension.lstrip('.')
if extension in extlist or ext.isdigit():
fullpath = os.path.join(root, name)
if show_relative_path == True:
# add one for path separator
index = len(dir) + 1
fname = fullpath[index:]
myvisitor_2(fullpath, fname)
else:
myvisitor_2(fullpath, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(dir, myvisitor, extlist)
main()
import argparse
import csv
import datetime
import logging
import os
import re
import sys
'''
This program parses a gc log file for stop the world phases and keywords to csv file.
young gc types - G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold
mixed types - G1 Evacuation Pause
full gc types - Allocation Failure, System.gc()
'''
# list of columns
col_li = ['file name', 'line no.', 'host', 'pid','date time', 'process time', 'gc type/keyword', 'time', 'comment']
# list of extensions to visit
extlist = ['.current', '.0', '.1', '.2', '.3', '.4', '.5']
#extlist = ['.log', '.current', '.1', '.2']
# special patterns to search for
search_li = ['to-space','humongous']
#search_li = ['to-space','humongous', r'System.gc\(\)']
datefmt = r'^(\d+\.\d+): '
# ^ timestamp
#datefmt = r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): '
# ^ date time ^ timestamp
def gethomedir():
return os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
# globals
# list of directories to visit
dirlist = [r'/tmp']
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp())
show_relative_path = False
host_li = []
# log files were collected and put in directories by hostname, separated by '.'
def get_hostname(dirpath):
for h in host_li:
if dirpath.find(h) > -1:
return h
return ''
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid
def get_pid(filename):
li = filename.split('pid')
if( len(li) == 1 ):
return li[0]
else:
(pid, rest) = li[1].split('.', 1)
logging.debug("pid: %s", pid)
return pid
def myvisitor(extlist, dirname, names):
global fileinfo
logging.debug("Current directory: %s", dirname)
for f in names:
(p, ext) = os.path.splitext(f)
logging.debug("%s %s", f, ext)
if ext in extlist:
fullpath = os.path.join(dirname, f)
logging.debug(fullpath)
try:
hostname = get_hostname(dirname)
pid = get_pid(f)
fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
#except OSError, detail:
# print detail
def myvisitor_2(fullpath, f):
global fileinfo
try:
hostname = get_hostname(f)
pid = get_pid(f)
fileinfo = {'filename': f, 'host': hostname, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def process_file(fullpath):
linenum = 0
f = open(fullpath, 'r')
date_time = ''
process_time = ''
# process line by line to get basic information
for line in f:
linenum += 1
m = re.match(datefmt, line)
if m:
# save current timestamp
date_time = m.group(0)
process_time = m.group(1)
# check for keywords of interest
#process_search_pattern(line, linenum, date_time, process_time)
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'):
process_jvminfo(line, linenum)
elif line.startswith(' ') == False:
# check for stw pauses that appear on one line
process_remark_cleanup(line, linenum)
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used,
# gc phases need a multi-line regex to handle
# check for stw pause that spans multiple lines
f.seek(0)
text = f.read()
f.close()
# we are interested in activity that causes a stop-the-world pause and the duration of the gc
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# https://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
# process multi-line gc phases
process_young(text)
process_mixed(text)
process_full(text)
def process_jvminfo(s, linenum):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', '', 'jvm info', '', s])
def process_young(s):
'''
232610.071: [GC pause (G1 Evacuation Pause) (young)
Desired survivor size 1090519040 bytes, new threshold 15 (max 15)
- age 1: 2294896 bytes, 2294896 total
- age 2: 1768760 bytes, 4063656 total
- age 3: 2228888 bytes, 6292544 total
- age 4: 4939064 bytes, 11231608 total
- age 5: 4320224 bytes, 15551832 total
- age 6: 2211832 bytes, 17763664 total
- age 7: 594464 bytes, 18358128 total
- age 8: 1539128 bytes, 19897256 total
- age 9: 3044240 bytes, 22941496 total
- age 10: 2794640 bytes, 25736136 total
- age 11: 3209632 bytes, 28945768 total
- age 12: 2267952 bytes, 31213720 total
- age 13: 2402216 bytes, 33615936 total
- age 14: 2345184 bytes, 35961120 total
- age 15: 2231848 bytes, 38192968 total
232610.071: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 13138, predicted base time: 78.16 ms, remaining time: 121.84 ms, target pause time: 200.00 ms]
232610.071: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 1035 regions, survivors: 4 regions, predicted young region time: 11.03 ms]
232610.071: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 1035 regions, survivors: 4 regions, old: 0 regions, predicted pause time: 89.19 ms, target pause time: 200.00 ms]
, 0.1156739 secs]
'''
logging.debug("In process_young")
date_time = ''
process_time = 0.0
young_type = ''
initial_mark = ''
gc_time = 0.0
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(young\).+?, (\d+\.\d+) secs\]$'
'''
^type ^ elapsed time
'''
#patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(young\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
'''
^type ^initial_mark ^ elapsed time
'''
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
#pattern = re.compile(r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d\+\d\d\d\d): (\d*\.\d*): \[GC pause \(([ \w\.\(\)]*)\) \(young\).+?, (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
young_type = m.group(2)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Young generation collection - ' + young_type + initial_mark, gc_time, ''])
def process_mixed(s):
'''
257167.069: [GC pause (G1 Evacuation Pause) (mixed)
Desired survivor size 117440512 bytes, new threshold 15 (max 15)
- age 1: 169008 bytes, 169008 total
- age 2: 5032 bytes, 174040 total
- age 3: 2712288 bytes, 2886328 total
- age 4: 820208 bytes, 3706536 total
- age 5: 916704 bytes, 4623240 total
- age 6: 3246680 bytes, 7869920 total
- age 7: 852856 bytes, 8722776 total
- age 8: 605648 bytes, 9328424 total
- age 9: 983264 bytes, 10311688 total
- age 10: 1685120 bytes, 11996808 total
- age 11: 692152 bytes, 12688960 total
- age 12: 2147224 bytes, 14836184 total
- age 13: 1511072 bytes, 16347256 total
- age 14: 1832744 bytes, 18180000 total
- age 15: 1066168 bytes, 19246168 total
257167.069: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 70042, predicted base time: 71.62 ms, remaining time: 128.38 ms, target pause time: 200.00 ms]
257167.069: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 109 regions, survivors: 3 regions, predicted young region time: 6.64 ms]
257167.069: [G1Ergonomics (CSet Construction) finish adding old regions to CSet, reason: predicted time is too high, predicted time: 3.29 ms, remaining time: 0.00 ms, old: 79 regions, min: 79 regions]
257167.069: [G1Ergonomics (CSet Construction) added expensive regions to CSet, reason: old CSet region num not reached min, old: 79 regions, expensive: 29 regions, min: 79 regions, remaining time: 0.00 ms]
257167.069: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 109 regions, survivors: 3 regions, old: 79 regions, predicted pause time: 285.70 ms, target pause time: 200.00 ms]
257167.236: [G1Ergonomics (Mixed GCs) continue mixed GCs, reason: candidate old regions available, candidate old regions: 344 regions, reclaimable: 2334497912 bytes (6.21 %), threshold: 5.00 %]
, 0.1677699 secs]
'''
process_time = 0.0
date_time = ''
mixed_type = ''
gc_time = 0.0
# output similar to GC pause (young)
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\).+?, (\d+\.\d+) secs\]$'
# ^mixed_type
#patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$'
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
mixed_type = m.group(2)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Mixed generation collection - ' + mixed_type, gc_time, ''])
def process_full(s):
'''
422052.838: [Full GC (System.gc()) 16G->10G(35G), 34.1545090 secs]
'''
date_time = ''
process_time = 0.0
young_type = ''
gc_time = 0.0
gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, (\d+\.\d+) secs\]$'
# ^ full gc type
#gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, real=(\d+\.\d+) secs\]\s*$'
patternstr = datefmt + gcfmt
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
full_gc_type = m.group(2)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Full GC - ' + full_gc_type , gc_time, ''])
def process_remark_cleanup(s, linenum):
'''
These gc log statements show up on a single line.
Example:
706.065: [GC cleanup 220M->218M(512M), 0.0021548 secs]
706.035: [GC remark, 0.0278976 secs]
108684.812: [GC remark 108684.812: [Finalize Marking, 0.0018014 secs] 108684.814: [GC ref-proc, 0.0089392 secs] 108684.823: [Unloading, 0.0317085 secs], 0.0672140 secs]
'''
gc_type = ''
date_time = ''
process_time = 0.0
gc_time = 0.0
m = re.match(datefmt + r'\[GC remark.+(\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC remark'
process_time = m.group(1)
gc_time = m.group(2)
else:
m = re.match(datefmt + r'\[GC cleanup .+, (\d+\.\d+) secs\]$', s)
if m:
gc_type = 'GC cleanup'
date_time = '0'
process_time = m.group(1)
gc_time = m.group(2)
if gc_type != '':
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, gc_type, gc_time])
def process_search_pattern(s, linenum, date_time, process_time):
'''
Look for search strings of interest. If found write to csv.
'''
patternstr = r'({})'.format('|'.join(search_li))
m = re.search(patternstr, s, re.IGNORECASE)
if m:
search_pattern = m.group(1).lower()
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
'''
for search_pattern in search_li:
if re.search(search_pattern, s, re.IGNORECASE):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s])
break
'''
def process_args():
global dirlist, output_filename, show_relative_path, host_li
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument("--show_relative_path", help="show relative path in filename column. true or false. Default is false.")
parser.add_argument("--hosts", help="list of hosts, separated by commas.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=r'This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:-PrintReferenceGC')
args = parser.parse_args()
if args.start_dir:
dirlist = [args.start_dir]
output_filename = args.start_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.output_dir:
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.show_relative_path and args.show_relative_path.lower() == 'true':
show_relative_path = True
if args.hosts:
host_li = args.hosts.split(',')
def main():
global mywriter
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
# write column headings
mywriter.writerow(col_li)
for dir in dirlist:
logging.debug(dir)
for root, dirs, files in os.walk(dir):
for name in files:
logging.debug(os.path.join(root, name))
(b, ext) = os.path.splitext(name)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(root, name)
if show_relative_path == True:
# add one for path separator
index = len(dir) + 1
fname = fullpath[index:]
myvisitor_2(fullpath, fname)
else:
myvisitor_2(fullpath, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(dir, myvisitor, extlist)
main()
import os
import csv
import re
import logging
import argparse
import datetime
# list of columns
col_li = ['file name', 'line no.', 'host', 'pid', 'process time', 'gc type/keyword', 'time', 'size before gc', 'size after gc', 'total heap size']
# list of directories to visit
dirlist = [r'E:\log']
show_relative_path = False
def gethomedir():
return os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp())
# list of extensions to visit
extlist = ['.log']
# special patterns to search for
search_li = ['to-space','humongous', r'System.gc\(\)']
# log files were collected and put in directories by hostname, separated by '.'
def get_hostname(dirpath):
(head, tail) = os.path.split(dirpath)
if tail.find('.') > -1:
(hostname, rest) = tail.split('.', 1)
logging.debug("hostname: %s", hostname)
return hostname
else:
return ''
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid
def get_pid(filename):
li = filename.split('pid')
if( len(li) == 1 ):
return li[0]
else:
(pid, rest) = li[1].split('.', 1)
logging.debug("pid: %s", pid)
return pid
def myvisitor(extlist, dirname, names):
global fileinfo
logging.debug("Current directory: %s", dirname)
for f in names:
(p, ext) = os.path.splitext(f)
logging.debug("%s %s", f, ext)
if ext in extlist:
fullpath = os.path.join(dirname, f)
logging.debug(fullpath)
try:
hostname = get_hostname(dirname)
pid = get_pid(f)
fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
#except OSError, detail:
# print detail
def process_jvminfo(s, linenum):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', 'jvm info', '', '', '', '', s])
def process_file(fullpath):
linenum = 0
f = open(fullpath, 'r')
# process line by line to get basic information
for line in f:
linenum += 1
# check for keywords of interest
process_search_pattern(line, linenum)
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'):
process_jvminfo(line, linenum)
elif line.startswith(' ') == False:
process_remark_cleanup_fullgc(line, linenum)
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used,
# gc phases need a multi-line regex to handle
# check for stw pause that spans multiple lines
f.seek(0)
text = f.read()
f.close()
# we are interested in activity that causes a stop-the-world pause and the duration of the gc
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs
# process multi-line gc phases
process_young(text)
process_mixed(text)
def process_young(s):
'''
These gc log statements show up on multiple lines.
Example:
54614.619: [GC pause (young)
Desired survivor size 109051904 bytes, new threshold 16 (max 25)
- age 1: 9991736 bytes, 9991736 total
54614.620: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 4184, predicted base time: 28.58 ms, remaining time: 971.42 ms, target pause time: 1000.00 ms]
54614.620: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 199 regions, survivors: 4 regions, predicted young region time: 939.32 ms]
54614.620: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 199 regions, survivors: 4 regions, old: 0 regions, predicted pause time: 967.90 ms, target pause time: 1000.00 ms]
54614.644: [SoftReference, 878 refs, 0.0006080 secs]54614.645: [WeakReference, 1371 refs, 0.0003980 secs]54614.645: [FinalReference, 6591 refs, 0.0029020 secs]54614.648: [PhantomReference, 5 refs, 106 refs, 0.0019450 secs]54614.650: [JNI Weak Reference, 0.0090930 secs], 0.0433140 secs]
'''
process_time = 0.0
gc_time = 0.0
pattern = re.compile(r'^(\d*\.\d*): \[GC pause [ \w\(\)]* \(young\)(.+?), (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
# multi-line search
for m in pattern.finditer(s):
process_time = m.group(1)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], process_time, 'Young generation collection', gc_time, '', '', ''])
def process_mixed(s):
process_time = 0.0
gc_time = 0.0
# output similar to GC pause (young)
pattern = re.compile(r'^(\d*\.\d*): \[GC pause \(mixed\)(.+?), (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL)
for m in pattern.finditer(s):
process_time = m.group(1)
gc_time = m.group(3)
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], process_time, 'Mixed generation collection', gc_time, '', '', ''])
def process_remark_cleanup_fullgc(s, linenum):
'''
These gc log statements show up on a single line.
Example:
44973.752: [GC remark 44973.753: [GC ref-proc44973.753: [SoftReference, 3741 refs, 0.0031090 secs]44973.756: [WeakReference, 6937 refs, 0.0069930 secs]44973.763: [FinalReference, 2459 refs, 0.0038880 secs]44973.767: [PhantomReference, 28 refs, 1275 refs, 0.0029950 secs]44973.770: [JNI Weak Reference, 0.0621620 secs], 0.0803160 secs], 0.1021600 secs]
[Times: user=0.30 sys=0.00, real=0.11 secs]
44973.856: [GC cleanup 22G->22G(30G), 0.0100070 secs]
[Times: user=0.08 sys=0.00, real=0.01 secs]
151413.747: [Full GC151419.349: [SoftReference, 490 refs, 0.0000980 secs]151419.349: [WeakReference, 5036 refs, 0.0004770 secs]151419.349: [FinalReference, 10 refs, 0.0000230 secs]151419.349: [PhantomReference, 129 refs, 346 refs, 0.0000520 secs]151419.349: [JNI Weak Reference, 0.0025470 secs] 19G->19G(30G), 14.2256960 secs]
'''
gc_type = ''
process_time = 0.0
gc_time = 0.0
gc_size_before = ''
gc_size_after = ''
total_heap_size = ''
m = re.match(r'^(\d*\.\d*): \[GC remark \d*\.\d*: (.+), (\d*\.\d*) secs\]$', s)
if m:
gc_type = 'GC remark'
process_time = m.group(1)
gc_time = m.group(3)
else:
m = re.match(r'^(\d*\.\d*): \[GC cleanup (.+), (\d*\.\d*) secs\]$', s)
if m:
gc_type = 'GC cleanup'
process_time = m.group(1)
gc_time = m.group(3)
else:
m = re.match(r'^(\d*\.\d*): \[Full GC(.+) (\d+[MG])->(\d*[MG])\((\d*[MG])\), (\d*\.\d*) secs\]$', s)
if m:
gc_type = 'Full GC'
process_time = m.group(1)
gc_size_before = m.group(3)
gc_size_after = m.group(4)
total_heap_size = m.group(5)
gc_time = m.group(6)
if gc_type != '':
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], process_time, gc_type, gc_time, gc_size_before, gc_size_after, total_heap_size])
def process_search_pattern(s, linenum):
'''
Look for search strings of interest. If found write to csv.
'''
for search_pattern in search_li:
if re.search(search_pattern, s, re.IGNORECASE):
s = s.strip()
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', search_pattern, '', '', '', '', s])
break
def process_args():
global dirlist, output_filename, host_li
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing")
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level")
parser.add_argument("--hosts", help="list of hosts, separated by commas")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:+PrintReferenceGC")
args = parser.parse_args()
if args.start_dir:
dirlist = [args.start_dir]
if args.output_dir:
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv"
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
if args.hosts:
host_li = args.hosts.split(',')
def myvisitor_2(fullpath, f):
global fileinfo
try:
hostname = get_hostname(f)
pid = get_pid(f)
fileinfo = {'filename': f, 'host': hostname, 'pid': pid}
process_file(fullpath)
except OSError as err:
print("OS error: {0}".format(err))
def main():
global mywriter
process_args()
# write output to csv file
with open(output_filename, 'w', newline='') as csvfile:
#with open(output_filename, 'wb') as csvfile:
mywriter = csv.writer(csvfile)
# write column headings
mywriter.writerow(col_li)
for dir in dirlist:
logging.debug(dir)
for root, dirs, files in os.walk(dir):
for name in files:
logging.debug(os.path.join(root, name))
(b, ext) = os.path.splitext(name)
for x in extlist:
m = re.match(x, ext)
if m:
fullpath = os.path.join(root, name)
if show_relative_path == True:
# add one for path separator
index = len(dir) + 1
fname = fullpath[index:]
myvisitor_2(fullpath, fname)
else:
myvisitor_2(fullpath, name)
for name in dirs:
logging.debug(os.path.join(root, name))
#os.path.walk(dir, myvisitor, extlist)
main()
import re
import sys
datefmt = r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): '
# ^ date
# ^ time
# ^ millis
# ^ time zone
# ^ timestamp
filename = ''
# check python version
def check_version():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
if len(sys.argv) < 2:
print("No filename specified.")
print("Usage: {} <filename>".format(sys.argv[0]))
sys.exit(-1)
def process_args():
global filename
filename = sys.argv[1]
def process():
with open(filename, encoding="latin-1") as f:
# the number of lines that have heap size information
count = 0
# the total number of lines processed
linecount = 1
# date and time stamp
date_time = ''
# number of seconds elapsed since the process started
process_time = ''
print(', filename, line_number, date_time, process_time, begin_eden, begin_max_eden, end_eden, end_max_eden, begin_survivor, end_survivor, begin_heap, begin_max_heap, end_heap, end_max_heap')
for line in f:
line = line.strip()
#print(line)
m = re.match(datefmt, line)
if m:
# save current timestamp
date_time = m.group(1)
process_time = m.group(2)
# match heap information in following formats
# [Eden: 9632.0M(9632.0M)->0.0B(9624.0M) Survivors: 192.0M->200.0M Heap: 11.4G(16.0G)->2074.8M(16.0G)]
# [Eden: 4704.0M(9624.0M)->0.0B(9824.0M) Survivors: 200.0M->0.0B Heap: 6786.9M(16.0G)->931.6M(16.0G)], [Metaspace: 61553K->61499K(1105920K)]
edenstr = r'\s*\[Eden: (\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)->(\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\) '
survivorstr = r'Survivors: (\d+\.\d[B|K|M|G])->(\d+\.\d[B|K|M|G]) '
heapstr = r'Heap: (\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)->(\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)\].*'
patternstr = edenstr + survivorstr + heapstr
m = re.match(patternstr, line)
if m:
begin_eden = m.group(1)
begin_max_eden = m.group(2)
end_eden = m.group(3)
end_max_eden = m.group(4)
begin_survivor = m.group(5)
end_survivor = m.group(6)
begin_heap = m.group(7)
begin_max_heap = m.group(8)
end_heap = m.group(9)
end_max_heap = m.group(10)
count += 1
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(
count, filename, linecount, date_time, process_time, begin_eden,
begin_max_eden, end_eden, end_max_eden, begin_survivor, end_survivor,
begin_heap, begin_max_heap, end_heap, end_max_heap))
# match heap information in following formats
# 2022-02-22T14:22:29.770-0600: 3.287: [GC cleanup 18M->18M(3072M), 0.0059295 secs]
# 2022-02-22T14:22:34.301-0600: 7.817: [GC pause (Metadata GC Threshold) (young) (initial-mark) 123M->23M(3072M), 0.1070516 secs]
# 2022-02-22T15:27:01.100-0600: 3829.383: [GC pause (G1 Evacuation Pause) (young) 9827M->6775M(11G), 0.1417604 secs]
# 2022-02-22T16:42:59.750-0600: 8433.267: [GC pause (G1 Humongous Allocation) (young) (initial-mark) 1683M->1433M(3072M), 0.0867971 secs]
#young = r'\[GC pause \([\w ]+\) \(\w+\) (\d+[B|K|M|G])->(\d+[B|K|M|G])\((\d+[B|K|M|G])\), \d+\.\d+ secs\]'
young = r'\[GC [\w \(\)-]+ (\d+[B|K|M|G])->(\d+[B|K|M|G])\((\d+[B|K|M|G])\), \d+\.\d+ secs\]'
patternstr = datefmt + young
m = re.match(patternstr, line)
if m:
# first 2 group matches are used by datefmt
begin_heap = m.group(3)
end_heap = m.group(4)
end_max_heap = m.group(5)
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(
count, filename, linecount, date_time, process_time, '',
'', '', '', '', '',
begin_heap, '', end_heap, end_max_heap))
count += 1
linecount += 1
def main():
check_version()
process_args()
process()
main()
pip install csv-ical
pip install vobject
# check online for examples
teams = {'NY': 'Giants', 'Dallas' : 'Cowboys', 'Green Bay': 'Packers'}
for k,v in teams.iteritems():
print "%s => %s" % (k,v
# sort, then print
keys = teams.keys()
keys.sort()
for k in keys:
print '%s => %s' % (k, teams[k])
# alternatively
for key in sorted(teams):
print '%s => %s' % (key, teams[key])
import argparse
import csv
import datetime
import logging
import os
import subprocess
import sys
import traceback
# globals
dirlist = []
output_filename = ''
# only run jar tvf on extensions of .jar
ext_li = [ ".jar"]
# only process the following file types in jar tvf output
filetype_ext_li = [ '.class', '.jar']
filename_prefix = 'jar_checker_summary'
col_heading_li = ['artifact', 'size', 'content']
home_dir = os.path.expanduser('~')
def gettimestamp():
today = datetime.date.today()
return today.strftime("%Y%b%d")
def capture_process_output(filename):
cmd = 'jar tvf {0}'.format(filename)
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
output = result.stdout
logging.debug("result output: %s", output)
return output
def myvisitor(fullpath):
logging.debug("fullpath: %s", fullpath)
try:
filename, file_extension = os.path.splitext(fullpath)
if file_extension in ext_li:
logging.debug("fullpath: %s", fullpath)
jar_output = capture_process_output(fullpath)
for line in jar_output.split('\n'):
logging.debug(">>>>line: %s", line)
# only process lines with output
if line:
line_li = line.split()
logging.debug(r'........line_li: <%s>', ','.join(line_li))
size = line_li[0]
content = line_li[7]
content_filename, content_file_extension = os.path.splitext(content)
if content_file_extension in filetype_ext_li:
mywriter.writerow([fullpath, size, content])
except Exception as err:
logging.warning("Error caught while visiting {}".format(fullpath))
logging.warning("Error: {0}".format(err))
traceback.print_exc()
def process_args():
global dirlist, output_filename
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing. Multiple paths should be separated with a comma ','")
parser.add_argument("--output_dir", help="where the output file should be written to.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program searches jar files and outputs information about the content in csv format.")
args = parser.parse_args()
if args.start_dir:
dirlist = args.start_dir.split(',')
if args.output_dir:
output_filename = args.output_dir + os.path.sep + filename_prefix + "-" + gettimestamp() + ".csv"
else:
output_filename = home_dir + os.path.sep + filename_prefix + '-' + gettimestamp() + ".csv"
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
logging.debug("dirlist: %s", dirlist)
def main():
if sys.version_info < (3,7,0):
print("Please use a version of Python > 3.7")
sys.exit(-1)
process_args()
global mywriter
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
mywriter.writerow(col_heading_li)
for root in dirlist:
logging.debug("Processing: %s", root)
for currentpath, dirs, files in os.walk(root):
for name in files:
fullpath = os.path.join(currentpath, name)
logging.debug("root_dir: %s, currentpath: %s, fullpath: %s", root, currentpath, fullpath)
myvisitor(fullpath)
main()
import argparse
import csv
import logging
import os
import re
import sys
import urllib.request
########## This program is used to search for urls in pdf files.
########## The pdf files should be downloaded to a local directory.
########## This program will test the urls for broken links.
########## global variables
start_dir = ''
output_dir = ''
ext_list = ['pdf']
# key: url, value: urlInfo
links = {}
########## end global variables
class UrlInfo:
def __init__(self, url, hostname, files, count, responseCode, valid):
self.url = url
self.hostname = hostname
self.files = files
self.count = count
self.responseCode = responseCode
self.valid = valid
def openFileHelper(filename):
s = ''
with open(filename, 'rb') as fopen:
bytes = fopen.read()
# workaround to handle pdf files as they are binary format
s = bytes.decode('latin-1')
return s
# takes the filename of the file to search
def searchInFile(filename):
pattern = r'(http|https)://([a-zA-Z0-9\.#/%=_?-]*)'
# special characters
# # anchor
# % escape
# ? query string
# other special characters (not used by us):
# &, ~ (home directory location), + (plus sign)
text = openFileHelper(filename)
li = re.findall(pattern, text)
for item in li:
logging.debug('item is: {0}'.format(item));
url = item[0] + '://' + item[1]
# get hostname name
hostname = ''
m = re.match(r'^([a-zA-Z0-9\.-]*)', item[1])
if m:
hostname = m.group(1)
logging.debug('url is: {0}'.format(url));
if not url in links.keys():
#links[url] = 1
urlInfo = UrlInfo(url, hostname, [filename], 1, 0, False)
links[url] = urlInfo
else:
urlInfo = links[url]
urlInfo.count += 1
if filename not in urlInfo.files:
urlInfo.files.append(filename)
def testLinks():
print("testing links...")
key = ''
for key, value in links.items():
try:
responseCode = urllib.request.urlopen(key).getcode()
value.valid = True
value.responseCode = responseCode
except Exception as err:
logging.warning("Url: {0}, Error: {1}".format(key, err))
#traceback.print_exc()
if isinstance(err, urllib.error.HTTPError):
#print('type is: ')
#print(type(err))
value.responseCode = err.code
def outputLinks():
keys = list(links.keys())
keys.sort()
#numlinks = len(keys)
#print('The number of links: {0}'.format(numlinks))
output_filename = os.path.sep.join([output_dir, 'linkchecker.csv'])
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
# header
mywriter.writerow(['url', 'hostname', 'in files', 'response code', 'valid', 'occurrences'])
for key in keys:
value = links[key]
mywriter.writerow([key, value.hostname, ','.join(value.files), value.responseCode, value.valid, value.count])
#if not value.valid == False:
# print('url: {}, occurrences: {}'.format(key, valu:e.count))
#else:
# print('url: {}, in files: {}, occurrences: {}'.format(key, ','.join(value.files), value.count))
def process_args():
global start_dir, output_dir, ext_list
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.", required=True)
parser.add_argument("--output_dir", help="where the output file should be written to. If not specified it will be the same as start_dir.")
parser.add_argument("--ext_list", help="the list of file extensions to search in separated with commas. Default is pdf.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program is used to check files on disk for valid urls.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = start_dir
if args.ext_list:
ext_list = args.ext_list.split(',')
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
logging.debug("start_dir is: " + start_dir)
logging.debug("ext_list is: " + ",".join(ext_list))
def process():
logging.debug("in process(), start_dir is: " + start_dir)
for root, dirs, files in os.walk(start_dir):
for name in files:
(base, extension) = os.path.splitext(name)
logging.debug("file name is: " + name)
logging.debug("base file name is: " + base)
if extension.startswith('.'):
ext = extension.lstrip('.')
ext_match = False
if ext_list:
if ext in ext_list:
ext_match = True
else:
ext_match = True
if ext_match:
input_filename = os.path.join(root, name)
searchInFile(input_filename)
testLinks()
outputLinks()
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process()
main()
import os
import subprocess
import sys
import argparse
start_dir = os.path.expanduser('~')
output_dir = start_dir
heap_summary_cmd = '/home/dixson/work/tools/py/heap-summary.py'
def process_args():
global start_dir, output_dir
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.")
parser.add_argument("--output_dir", help="where the output file should be written to. If this is not set, this defaults to the start_dir")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program will parse a set of gc log files in a configured directory.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = start_dir
def process():
for root, dirs, files in os.walk(start_dir):
for name in files:
(base, extension) = os.path.splitext(name)
if extension.startswith('.'):
ext = extension.lstrip('.')
if ext.isdigit() or ext == 'current' or ext == 'log':
input_filename = os.path.join(root, name)
output_filename = input_filename + '.csv'
print(input_filename)
print(output_filename)
with open(output_filename, "w") as outfile:
subprocess.run(['python3', heap_summary_cmd, input_filename], stdout=outfile)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
process()
main()
python3 -m json.tool your_file.json
or
cat your_file.json | python3 -m json.tool
import argparse
import logging
import re
import shutil
import sys
multiplespaceregex = r'[\s]+'
filename = ''
search_text_file = ''
replacement_text_file = ''
# replace any white space characters with a regular expression for white space
def replaceWhiteSpace(s):
whitespacefound = False
searchstr = ''
for ch in s:
logging.debug(ch)
m = re.match(r'[\s]', ch)
if m:
logging.debug('I found whitespace')
if whitespacefound == False:
whitespacefound = True
else:
if whitespacefound == True:
searchstr += multiplespaceregex
searchstr += ch
whitespacefound = False
if whitespacefound == True:
searchstr += multiplespaceregex
return searchstr
def searchInFile(filename, searchstr):
text = openFileHelper(filename)
pattern = re.compile(searchstr)
m = pattern.search(text)
if m:
return True
else:
return False
def process_args():
global filename, search_text_file, replacement_text_file
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--filename", help="the file to search.")
parser.add_argument("--search_text_file", help="the text block to search and replace for.")
parser.add_argument("--replacement_text_file", help="the replacement text block.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program helps to replace search replace block text.")
args = parser.parse_args()
if args.filename:
filename = args.filename
if args.search_text_file:
search_text_file = args.search_text_file
if args.replacement_text_file:
replacement_text_file = args.replacement_text_file
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def openFileHelper(filename):
f = open(filename, 'r')
s = f.read()
f.close()
return s
def main():
global filename, search_text_file, replacement_text_file
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
original_text = openFileHelper(filename)
search_text = openFileHelper(search_text_file)
replacement_text = openFileHelper(replacement_text_file)
#searchstr = replaceWhiteSpace(search_text)
searchstr = search_text
logging.debug("searchstr..........")
logging.debug(searchstr)
found = searchInFile(filename, searchstr)
if found:
# copy file
dst = filename + '~'
shutil.copy(filename, dst)
pattern = re.compile(searchstr)
logging.debug("replacement text..........")
logging.debug(replacement_text)
replaced_text = pattern.sub(replacement_text, original_text)
logging.debug("replaced text..........")
logging.debug(replaced_text)
f = open(filename, "w")
n = f.write(replaced_text)
f.close()
main()
import argparse
import logging
import os
import re
import shutil
import sys
########## This program replaces find . -exec sed 's/a/b/g' {} \; because certain characters like backslash were too difficult to handle using bash
########## global variables
start_dir = ''
search_regex_file = ''
replacement_text_file = ''
ext_list = []
########## end global variables
def openFileHelper(filename):
f = open(filename, 'r')
s = f.read()
f.close()
return s
# takes the filename of the file to search
# pattern is the regex pattern to search for
def searchInFile(filename, pattern):
text = openFileHelper(filename)
#pattern = re.compile(searchregex)
m = pattern.search(text)
if m:
return True
else:
return False
def process_args():
global start_dir, search_regex_file, replacement_text_file, ext_list
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing.", required=True)
parser.add_argument("--search_regex_file", help="the file containing the regex to search for. The file should contain a single line and trailing whitespace will be stripped.", required=True)
parser.add_argument("--replacement_text_file", help="the file containing the replacement string. The file should contain a single line and trailing whitespace will be stripped.", required=True)
parser.add_argument("--ext_list", help="the list of file extensions to search in separated with commas.")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program helps to search and replace text.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.search_regex_file:
search_regex_file = args.search_regex_file
if args.replacement_text_file:
replacement_text_file = args.replacement_text_file
if args.ext_list:
ext_list = args.ext_list.split(',')
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
logging.debug("start_dir is: " + start_dir)
logging.debug("search_regex_file is: " + search_regex_file)
logging.debug("replacement_text_file is: " + replacement_text_file)
logging.debug("ext_list is: " + ",".join(ext_list))
def process(pattern):
for root, dirs, files in os.walk(start_dir):
for name in files:
(base, extension) = os.path.splitext(name)
if extension.startswith('.'):
ext = extension.lstrip('.')
ext_match = False
if ext_list:
if ext in ext_list:
ext_match = True
else:
ext_match = True
if ext_match:
input_filename = os.path.join(root, name)
found = searchInFile(input_filename, pattern)
if found:
# create backup copy
backup_filename = input_filename + '~'
shutil.copy(input_filename, backup_filename)
original_text = openFileHelper(input_filename)
# replace text
replaced_text = pattern.sub(replacement_text, original_text)
# save to original file
f = open(input_filename, "w")
n = f.write(replaced_text)
f.close()
def main():
global search_regex, replacement_text
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
search_regex = openFileHelper(search_regex_file).rstrip()
replacement_text = openFileHelper(replacement_text_file).rstrip()
logging.debug("search_regex is: " + search_regex)
logging.debug("replacement_text is: " + replacement_text)
pattern = re.compile(search_regex)
process(pattern)
main()
# very simple, no need to download web framework and deploy app, simply navigate to directory. directory serves as document root
python3 -m http.server 8888
import argparse
import csv
import logging
import os
import re
import sys
import traceback
'''
This code parses a Java thread dump txt file and outputs it to a csv file,
for easier analysis.
It multiple csv files, depending on command line arguments.
It takes the original thread dump file name and removes the .txt suffix and appends the above suffixes.
Sometimes the thread dump was generated with a long listing and will contain additional fields.
This program will attempt to parse using a long listing and a simple listing strategy.
It is normal to see some errors, as one strategy will fail.
'''
# selected comma for main separator, choose another separator
SUB_SEPARATOR = '|'
title = ''
jni_global_references = ''
heap = ''
start_dir = r'C:\Users\Dixson\Downloads\support\logs\test'
home_dir = os.path.expanduser('~')
output_dir = ''
print_runnable = True
print_thread_count = True
print_other_thread_summary = True
class Substate:
def __init__(self, msg, objectid, otherClassName):
self.msg = msg
self.objectid = objectid
self.otherClassName = otherClassName
# strategy
# Enhancement JDK-8200720 allows for additional fields
class EnhancedLongListingStrategy(object):
def __init__(self):
self.name = 'EnhancedLongListingStrategy'
self.col_li = ['name', 'number', 'type', 'priority', 'os_priority', 'cpu', 'elapsed', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack']
def process_threadprop(self, s):
# replace with underscores for easier parsing
s = s.replace('waiting on condition', 'waiting_on_condition')
s = s.replace('in Object.wait()', 'in_Object.wait()')
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry')
#logging.debug("s: {}".format(s))
thread_name = ''
threadprop = {}
# extract thread name
m = re.match(r'"(.*)"(.*)$', s)
if m:
thread_name = m.group(1)
substring = m.group(2)
if (s.find('daemon') > -1):
# general case, most threads 'labelled' daemon
li = substring.split()
thread_no = li[0].lstrip('#')
thread_type = li[1]
thread_priority = li[2]
thread_ospriority = li[3]
thread_cpu = li[4]
thread_elapsed = li[5]
thread_tid = li[6]
thread_nid = li[7]
thread_status = li[8]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 8:
thread_address = li[9]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'type': thread_type, 'priority': thread_priority, 'os_priority': thread_ospriority,'cpu': thread_cpu, 'elapsed': thread_elapsed, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
# threads not labelled 'daemon'
logging.debug('substring {}'.format(substring))
m = re.match(r' #(\d+) (.*)$', substring)
if m:
thread_no = m.group(1)
substring = m.group(2)
li = substring.split()
thread_priority = li[0]
thread_ospriority = li[1]
thread_cpu = li[2]
thread_elapsed = li[3]
thread_tid = li[4]
thread_nid = li[5]
thread_status = li[6]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) > 6:
thread_address = li[7]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'priority': thread_priority, 'os_priority': thread_ospriority, 'cpu': thread_cpu, 'elapsed':thread_elapsed, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
# jvm threads - only display basic information
# "G1 Conc#0" os_prio=0 cpu=1453.41ms elapsed=52307.25s tid=0x00007f912406ded0 nid=0x1cafd5 runnable
li = substring.split()
thread_ospriority = li[0]
thread_cpu = li[1]
thread_elapsed = li[2]
thread_tid = li[3]
thread_nid = li[4]
thread_status = li[5]
threadprop = {'name' : thread_name, 'os_priority' : thread_ospriority, 'cpu': thread_cpu, 'elapsed' : thread_elapsed, 'tid' : thread_tid, 'nid' : thread_nid, 'status' : thread_status}
return threadprop
# generated with jstack -l
class LongListingStrategy(object):
def __init__(self):
self.name = 'LongListingStrategy'
self.col_li = ['name', 'number', 'type', 'priority', 'os_priority', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack']
def process_threadprop(self, s):
# replace with underscores for easier parsing
s = s.replace('waiting on condition', 'waiting_on_condition')
s = s.replace('in Object.wait()', 'in_Object.wait()')
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry')
#logging.debug("s: {}".format(s))
thread_name = ''
threadprop = {}
# extract thread name
m = re.match(r'"(.*)"(.*)$', s)
if m:
thread_name = m.group(1)
substring = m.group(2)
# general case, most threads 'labelled' daemon
if (s.find('daemon') > -1):
li = substring.split()
thread_no = li[0].lstrip('#')
thread_type = li[1]
thread_priority = li[2]
thread_ospriority = li[3]
thread_tid = li[4]
thread_nid = li[5]
thread_status = li[6]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 8:
thread_address = li[7]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'type': thread_type, 'priority': thread_priority, 'os_priority': thread_ospriority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
#"RMI Reaper" #14 prio=5 os_prio=0 tid=0x00007f2bd1d3f800 nid=0x2161 in Object.wait() [0x00007f2106550000]
#"main" #1 prio=5 os_prio=0 tid=0x00007f2bd000b800 nid=0x20ab waiting on condition [0x00007f2bd5f79000]
#"main" #1 prio=5 os_prio=0 tid=0x00007f79c000d800 nid=0x9091 sleeping[0x00007f79c8305000]
#"GS-swiftJmsSenderContainer-1" #205 prio=5 os_prio=0 tid=0x00007f684645a000 nid=0x6156 sleeping[0x00007f6735dea000]
m = re.match(r' #(\d+) (.*)$', substring)
if m:
thread_no = m.group(1)
substring = m.group(2)
li = substring.split()
thread_priority = li[0]
thread_ospriority = li[1]
thread_tid = li[2]
thread_nid = li[3]
thread_status = li[4]
if len(li) > 5:
thread_address = li[5]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 6:
thread_address = li[5]
else:
thread_address = ''
threadprop = {'name': thread_name, 'number': thread_no, 'priority': thread_priority, 'os_priority': thread_ospriority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
# jvm threads only display basic information
li = substring.split()
thread_ospriority = li[0]
thread_tid = li[1]
thread_nid = li[2]
thread_status = li[3]
threadprop = {'name' : thread_name, 'os_priority' : thread_ospriority, 'tid' : thread_tid, 'nid' : thread_nid, 'status' : thread_status}
return threadprop
# generated with jstack; missing thread number and os_priority
class SimpleListingStrategy(object):
def __init__(self):
self.name = 'SimpleListingStrategy'
self.col_li = ['name', 'type', 'priority', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack', 'locked_ownable_synchronizers']
def process_threadprop(self, s):
# replace with underscores for easier parsing
s = s.replace('waiting on condition', 'waiting_on_condition')
s = s.replace('in Object.wait()', 'in_Object.wait()')
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry')
thread_name = ''
threadprop = {}
# extract thread name
m = re.match(r'"(.*)"(.*)$', s)
if m:
thread_name = m.group(1)
substring = m.group(2)
# general case, most threads 'labelled' daemon
if (s.find('daemon') > -1):
li = substring.split()
thread_type = li[0]
thread_priority = li[1]
thread_tid = li[2]
thread_nid = li[3]
thread_status = li[4]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 6:
thread_address = li[5]
else:
thread_address = ''
threadprop = {'name': thread_name, 'type': thread_type, 'priority': thread_priority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
li = substring.split()
if len(li) > 2:
thread_priority = li[0]
thread_tid = li[1]
thread_nid = li[2]
thread_status = li[3]
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000]
if len(li) >= 5:
thread_address = li[4]
else:
thread_address = ''
threadprop = {'name': thread_name, 'priority': thread_priority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address}
else:
#"GS-GSPingManager:com.gigaspaces.internal.lrmi.stubs.LRMISpaceImpl:1632991357520" Id=721 TIMED_WAITING
thread_id = li[0]
thread_status = li[1]
threadprop = {'name': thread_name, 'tid' : thread_id, 'status' : thread_status}
return threadprop
# end strategy
# generated with jstack; missing thread number and os_priority
class SuperSimpleListingStrategy(object):
def __init__(self):
self.name = 'SuperSimpleListingStrategy'
self.col_li = ['name', 'thread_id', 'status', 'in_native', 'waiting_on_obj', 'other_thread', 'other_thread_id']
def process_threadprop(self, s):
threadprop = {}
thread_name = ''
thread_id = ''
thread_status = ''
waiting_on_object = ''
other_thread = ''
other_thread_id = ''
in_native = False
# extract thread name and id
m = re.match(r'"(.*)" Id=([\d]+) (.*)$', s)
if m:
thread_name = m.group(1)
thread_id = m.group(2)
substring = m.group(3)
in_native_match = re.match( r'(.*) \(in native\)', substring)
if in_native_match:
substring = in_native_match.group(1)
in_native = True
# most complicated
#"GS-LRMI-Connection-pool-1-thread-89" Id=263 WAITING on java.util.concurrent.locks.ReentrantLock$NonfairSync@66416e31 owned by "GS-LRMI-Connection-pool-1-thread-240" Id=413
owned_by_match = re.match( r'(WAITING|BLOCKED) on (.*) owned by "(.*)" Id=([\d]+)', substring)
if owned_by_match:
thread_status = owned_by_match.group(1)
waiting_on_object = owned_by_match.group(2)
other_thread = owned_by_match.group(3)
other_thread_id = owned_by_match.group(4)
# "Timer-4" Id=56 TIMED_WAITING on java.util.TaskQueue@1c7d49f1
else:
waiting_on_match = re.match(r'(TIMED_WAITING|WAITING) on (.*)', substring)
if waiting_on_match:
thread_status = waiting_on_match.group(1)
waiting_on_object = waiting_on_match.group(2)
else:
thread_status = substring.strip()
threadprop = {'name': thread_name, 'thread_id': thread_id,
'status': thread_status, 'in_native' : 'T' if in_native else '',
'waiting_on_obj': waiting_on_object, 'other_thread': other_thread, 'other_thread_id': other_thread_id}
return threadprop
# end strategy
# an indented line containing java.lang.Thread.State is usually the first line of the block
def process_state(li):
if( len(li) > 0):
#logging.debug(li[0])
m = re.match(r'^\s+java\.lang\.Thread\.State: (.*)$', li[0])
if m:
return m.group(1)
#if( block_li[0].find('java.lang.Thread.State:') > -1):
else:
return ''
else:
return ''
# a stack trace may have additional information I call substate
def process_substate(li):
#logging.debug("In process substate")
#logging.debug("li is: " + ''.join(li))
substateObj = None
substate_li = []
substateObj_li = []
for s in li:
s = s.strip()
logging.debug("s is: '" + s + "'")
if( s.startswith('-')):
substate_li.append(s)
m = re.match(r'-(.*)<(.*)> \(a (.*)\)', s)
if m:
msg = m.group(1).strip()
objectid = m.group(2)
classname = m.group(3)
logging.debug("match found")
subStateObj = Substate(msg, objectid, classname)
substateObj_li.append(subStateObj)
return (SUB_SEPARATOR.join(substate_li), substateObj_li)
def process_stack(li):
stack_li = []
for s in li:
s = s.strip()
stack_li.append(s)
#logging.debug("begin>>>>> %s" % SUB_SEPARATOR.join(stack_li))
#logging.debug("end>>>>>>>")
return SUB_SEPARATOR.join(stack_li)
def process_heap(li):
heap_li = []
for s in li:
s = s.strip()
heap_li.append(s)
s = SUB_SEPARATOR.join(heap_li)
return s.replace(',', '\'')
# the information in this block occurs below the stack trace
def process_locked_ownable_sync(block_li):
#logging.debug("block_li in locked_ownable_synchronizers: {}".format(block_li))
if not block_li:
return ''
length = len(block_li)
for n in range(0, length):
s = block_li[n]
if s.find('Locked ownable synchronizers:') > -1 :
# return value in next line
if n + 1 < length:
return block_li[n + 1].strip().lstrip('-')
return ''
def process_block(strategy, block_li, nextblock_li, threadprop_by_name):
global title, jni_global_references, heap
logging.debug("BEGIN BLOCK")
logging.debug(block_li)
logging.debug("END BLOCK")
s = block_li[0]
if (s.startswith('"')):
# thread name found
threadprop = strategy.process_threadprop(s)
threadprop['state'] = process_state(block_li[1:])
threadprop['block'] = block_li[1:]
# there can be more than 1 thread referenced
(substate, substateObj) = process_substate(block_li[1:])
threadprop['substate'] = substate
threadprop['substateObj'] = substateObj
threadprop['stack'] = process_stack(block_li[1:])
threadprop['locked_ownable_synchronizers'] = process_locked_ownable_sync(nextblock_li)
threadprop_by_name[threadprop['name']] = threadprop
elif (s.startswith('Full thread dump')):
title = s
elif (s.startswith('JNI global references') or s.startswith('JNI global refs')):
jni_global_references = s
elif (s == 'Heap'):
heap = process_heap(block_li[1:])
else:
logging.debug('Skipping block that starts with line: {}'.format(s))
return threadprop_by_name
# print substate in another format for easy viewing
# print thread name, id, status, object id, classname
def print_substate(threadprop_by_name, mywriter):
mywriter.writerow(['substate (redux)', 'thread', 'tid', 'msg', 'other_oid', 'other_classname (e.g, locked/waiting on)'])
for k in threadprop_by_name.keys():
thread_name = k
#logging.debug(threadprop_by_name[k])
threadprop = threadprop_by_name[k]
tid = threadprop['tid'].split('=')[1]
if 'substateObj' in threadprop:
substatusObj_li = threadprop_by_name[k]['substateObj']
for substatusObj in substatusObj_li:
mywriter.writerow(['',thread_name, tid, substatusObj.msg, substatusObj.objectid, substatusObj.otherClassName ])
def print_runnable_stack(threadprop_by_name, mywriter):
mywriter.writerow(['runnable', 'thread (in state RUNNABLE)', 'stack'])
for k in threadprop_by_name.keys():
thread_name = k
threadprop = threadprop_by_name[k]
state = threadprop['state']
if state == 'RUNNABLE':
# re-format original stack trace
block = [line.strip() for line in threadprop['block'][1:]]
block_s = "\n".join(block)
mywriter.writerow(['',thread_name,block_s])
def count_occurrences(threadprop_by_name, field, mywriter, column_name):
logging.debug("field is: " + field);
values = []
count_dict = {}
for k in threadprop_by_name.keys():
value = threadprop_by_name[k][field]
values.append(value)
for item in values:
if item in count_dict:
count = count_dict[item]
count += 1
count_dict[item] = count
else:
count_dict[item] = 1
mywriter.writerow([column_name, 'value', 'count'])
'''
for key, value in sorted(count_dict.iteritems(), key=lambda (k,v): (v,k), reverse=True):
#print ", %s, %s" % (key[:160], value)
mywriter.writerow(['', key[:160], value])
'''
sorted_keys = sorted(count_dict.keys())
for key in sorted_keys:
value = count_dict[key]
s = key[:160]
if not s:
s = "EMPTY"
mywriter.writerow(['', s, value])
def print_threads(strategy, threadprop_by_name, mywriter):
mywriter.writerow(strategy.col_li)
mywriter.writerow(['Title', title])
mywriter.writerow(['JNI global references', jni_global_references])
if heap:
mywriter.writerow(['Heap', heap])
mywriter.writerow([])
mywriter.writerow(['** Begin threads **'])
keys = sorted(threadprop_by_name.keys())
#keys.sort()
for k in keys:
#logging.debug('%s => %s' % (k, threadprop_by_name[k]))
li = []
threadprop_dict = threadprop_by_name[k]
for col in strategy.col_li:
if col in threadprop_dict:
s = threadprop_dict[col]
else:
s = ''
s = s if not None else ''
li.append(s)
mywriter.writerow(li)
def write_csv(strategy, threadprop_by_name, filename):
# write output to csv file
# output compilation of thread properties
filename_woext, file_extension = os.path.splitext(filename)
output_filename = output_dir + os.path.sep + filename_woext + '.csv'
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
print_threads(strategy, threadprop_by_name, mywriter)
# output summary
if print_other_thread_summary == True and print_thread_count == True and print_runnable_stack == True:
output_filename = output_dir + os.path.sep + filename_woext + '-summary.csv'
with open(output_filename, 'w', newline='') as csvfile:
mywriter = csv.writer(csvfile)
if print_thread_count == True:
count_occurrences(threadprop_by_name, 'status', mywriter, 'status')
count_occurrences(threadprop_by_name, 'state', mywriter, 'state')
count_occurrences(threadprop_by_name, 'substate', mywriter, 'linked to')
# an application with many threads in a certain section of code may indicate a problem
count_occurrences(threadprop_by_name, 'stack', mywriter, 'stack (first few lines of)')
if print_other_thread_summary == True:
print_substate(threadprop_by_name, mywriter)
if print_runnable == True:
print_runnable_stack(threadprop_by_name, mywriter)
def process_file(fullpathname, filename):
line_number = 0
f = open(fullpathname)
# allblock_li is all the thread text sections saved to a list
allblock_li = []
# current_block_li is a text section containing information for a single thread
current_block_li = []
# k thread name -> v dictionary with key (column heading or property name), value pairs for that thread
threadprop_by_name = {}
firsttime = True
for line in f:
line_number += 1
s = line.rstrip()
#logging.debug(">> %d: %s" % (line_number, s))
# lines beginning with white space
m = re.match(r'^(\s)+(.*)$', s)
# separate lines in file into sections, ie, block
# save for future processing
# need to be able to look ahead into block and next block
if( not m):
# new block found
if( firsttime == False ):
#threadprop_by_name = process_block(strategy, current_block_li, threadprop_by_name)
allblock_li.append(current_block_li)
else:
firsttime = False
# reset current_block_li
current_block_li = [s]
else:
current_block_li.append(s)
allblock_li.append(current_block_li)
#threadprop_by_name = process_block(strategy, current_block_li, threadprop_by_name)
# initialize strategies
strategy_li = [EnhancedLongListingStrategy(), LongListingStrategy(), SimpleListingStrategy(), SuperSimpleListingStrategy()]
# try each strategy until one processes cleanly
for strategy in strategy_li:
try:
threadprop_by_name = {}
length = len(allblock_li)
for n in range(0, length):
#for block in allblock_li:
block = allblock_li[n]
if (n+1 >= length):
nextblock = None
else:
nextblock = allblock_li[n+1]
threadprop_by_name = process_block(strategy, block, nextblock, threadprop_by_name)
write_csv(strategy, threadprop_by_name, filename)
# if this succeeds, no need to try next strategy
break
except Exception as err:
logging.warning("Error caught while parsing {} using strategy {}".format(filename, strategy.name))
logging.warning("Error: {0}".format(err))
traceback.print_exc()
def process_args():
global start_dir, output_dir, print_runnable, print_thread_count, print_other_thread_summary
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument("--start_dir", help="the root directory to begin processing")
parser.add_argument("--output_dir", help="where the output file should be written to")
parser.add_argument("--print_runnable", help="print the stack traces of the runnable threads. Default is true")
parser.add_argument("--print_thread_count", help="print a summary of thread counts by class. Default is true")
parser.add_argument("--print_other_thread_summary", help="print a summary of the referenced threads. Default is true")
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level")
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a set of thread dump files generated with jstack or kill -3.")
args = parser.parse_args()
if args.start_dir:
start_dir = args.start_dir
if args.output_dir:
output_dir = args.output_dir + os.path.sep
else:
output_dir = home_dir + os.path.sep
if args.print_runnable:
if args.print_runnable.lower() == 'false' or args.print_runnable.lower() == 'f':
print_runnable = False
if args.print_thread_count:
if args.print_thread_count.lower() == 'false' or args.print_thread_count.lower() == 'f':
print_thread_count = False
if args.print_other_thread_summary:
if args.print_other_thread_summary.lower() == 'false' or args.print_other_thread_summary.lower() == 'f':
print_other_thread_summary = False
if args.log_level:
if args.log_level == 'CRITICAL':
logging.basicConfig(level=logging.CRITICAL)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO)
elif args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
else:
# set logging level. WARNING is default level
logging.basicConfig(level=logging.WARNING)
def main():
if sys.version_info < (3,0,0):
print("Please use a version of Python > 3")
sys.exit(-1)
process_args()
for start, dirs, files in os.walk(start_dir):
for name in files:
if name.endswith('txt') or name.endswith('tdump'):
process_file(os.path.join(start, name), name)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment