Last active
November 7, 2024 20:06
-
-
Save dixsonhuie/825194e24e6dfa66ec230aeb98ba2c2b to your computer and use it in GitHub Desktop.
python examples
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import csv | |
import re | |
import logging | |
import argparse | |
import datetime | |
col_li = ['filename', 'line_number', 'host', 'pid', 'comp', 'id', 'time', 'ms', 'category', 'level', 'logger', 'message'] | |
dirlist = [r'E:\log'] | |
start_date = None | |
end_date = None | |
# date format used to convert command line arguments into a datetime object | |
# example: 2021-09-14 | |
filter_date_fmt = '%Y-%m-%d' | |
# adding hours, minutes and seconds | |
filter_datetime_fmt = filter_date_fmt + ' %H:%M:%S' | |
home_dir = os.path.expanduser('~') | |
filename_prefix = 'app_log_summary' | |
output_filename = '' | |
show_fullpath = False | |
# list of extensions to visit | |
extlist = ['\.\d+', '.log', '.out', '.stdouterr', '.err'] | |
# regex representing entire date time portion from a line in a log file | |
# example: 2021-09-14 16:22,124 | |
datefmt = r'(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d)' | |
# search for the following strings that may indicate an error | |
error_li = [ 'warning', 'severe', 'exception', 'error', 'failure', 'Long GC collection'] | |
# for setting log level | |
level_li = ['SEVERE', 'WARNING', 'INFO', 'CONFIG', 'FINE', 'FINER', 'FINEST'] | |
host_li = [] | |
def gettimestamp(): | |
today = datetime.date.today() | |
return today.strftime("%Y%b%d") | |
# check if string matches any of the hostnames | |
def get_hostname(s): | |
for host in host_li: | |
pattern = '.*({0}).*'.format(host) | |
m = re.match(pattern, s) | |
if m: | |
return m.group(1) | |
return '' | |
# check if filename contains pid and component information | |
# only works if filename format has not changed | |
def get_pid(s): | |
comp = '' | |
id = '' | |
host = '' | |
pid = '' | |
patternstr = r'.*(gsc|manager|gsm|lus)_(\d+)-([\w\.]+)-(\d+).*' | |
m = re.match(patternstr, s) | |
if m: | |
comp = m.group(1) | |
id = m.group(2) | |
host = m.group(3) | |
pid = m.group(4) | |
else: | |
# other processes: gsa, GSWebUI, ui, service | |
patternstr = r'.*(gsa|GSWebUI|ui|service)-([\w\.]+)-(\d+).*' | |
m = re.match(patternstr, s) | |
if m: | |
comp = m.group(1) | |
host = m.group(2) | |
pid = m.group(3) | |
return (comp, id, host, pid) | |
def process_file(fullpath): | |
line_number = 0 | |
with open(fullpath, encoding="latin-1") as f: | |
sDate = '' | |
dtDate = None | |
millis = '' | |
for line in f: | |
found = False | |
line_number += 1 | |
# skip lines beginning with white space | |
if re.match(r'\s', line): | |
continue | |
# save the timestamp for lines with no timestamp | |
patternstr = r'.*{}.*'.format(datefmt) | |
m = re.match(patternstr, line) | |
if m: | |
sDate = m.group(1) | |
dtDate = datetime.datetime.strptime(sDate, filter_datetime_fmt) | |
millis = m.group(2) | |
# filter out log lines by date | |
if start_date is not None and dtDate is not None and dtDate < start_date: | |
continue | |
if end_date is not None and dtDate is not None and dtDate > end_date: | |
continue | |
for error_pattern in error_li: | |
if re.search(error_pattern, line, re.IGNORECASE): | |
found = True | |
break | |
logging.debug("log date as string: %s, log date: %s", sDate, '' if dtDate is None else dtDate.strftime(filter_date_fmt)) | |
if found == True: | |
# truncate the line | |
line = line[:300] | |
line = line.rstrip() | |
logging.debug("Line: %s", line) | |
process_line(line, fullpath, line_number, sDate, millis) | |
def process_line(s, fullpath, line_number, date, millis): | |
# example: 2017-01-05 14:11:21,821 LUS INFO [com.sun.jini.reggie] - Exception | |
# example: 2016-12-31 17:38:57,334 pmds.deployment-1.8.9-pu.18 [2] WARNING [com.gigaspaces.core.common] - Primary space is unavailable | |
patternstr = r'{}{}'.format(datefmt, r' ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$') | |
m = re.match(patternstr, s) | |
#m = re.match(r'(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$', s) | |
''' | |
^ date ^ millis ^ category ^ optional^ level ^ logger ^ message | |
match 0 or 1 times | |
''' | |
if m: | |
# 1 date | |
# 2 millis | |
# 3 category | |
# 4 optional, '[2]' in comment above | |
# 5 level | |
# 6 logger | |
# 7 message | |
category = '' | |
level = '' | |
if m.group(4) == None: | |
category = m.group(3) | |
# extract level information | |
# eg., LUS INFO | |
for i in level_li: | |
index = category.find(i) | |
if index >= 0: | |
level = category[index:] | |
category = category[0:index] | |
break | |
else: | |
category = m.group(3) + m.group(4) | |
level = m.group(5) | |
# this group also grabs the space that may come after this optional string; need to strip it out | |
category = category.strip() | |
level = level.strip() | |
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], fileinfo['pid'], fileinfo['comp'], fileinfo['id'], m.group(1), m.group(2), category, level, m.group(6), m.group(7)]) | |
else: | |
# sometimes clients just provide output of the gs-agent process | |
# [gsc][1/10120] 2017-10-11 10:52:37,557 CommonClassLoader WARNING [net.jini.discovery.LookupLocatorDiscovery] - java.net.SocketTimeoutException: connect timed out - using unicast locator 10.10.10.117:4174 - delay next lookup by 1,000 ms | |
patternstr = r'{}{}{}'.format(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*', datefmt, r' ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$') | |
m = re.match(patternstr, s) | |
#m = re.match(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*(\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d),(\d\d\d) ([\w \-\.]*)(\[\d\] )?([\w]*)? \[([\w\-\.]*)\] - (.*)$', s) | |
''' | |
^ proc ^ id ^ pid - the rest is a repeat of the regex used above | |
''' | |
if m: | |
# 1 component | |
# 2 id | |
# 3 pid | |
# 4 date | |
# 5 millis | |
# 6 category | |
# 7 optional | |
# 8 level | |
# 9 logger | |
# 10 message | |
category = '' | |
level = '' | |
if m.group(7) == None: | |
category = m.group(6) | |
# extract level information | |
for i in level_li: | |
index = category.find(i) | |
if index >= 0: | |
level = category[index:] | |
category = category[0:index] | |
break | |
category = category.strip() | |
if category.upper() == m.group(1).upper(): | |
category = '' | |
else: | |
category = m.group(6) + m.group(7) | |
level = m.group(8) | |
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], m.group(3), m.group(1), m.group(2), m.group(4), m.group(5), category, level, m.group(9), m.group(10)]) | |
else: | |
#[manager][1/13986] Caused by: com.gigaspaces.security.AuthenticationException: Authentication request is invalid - you are not logged in. | |
# log message pattern missing timestamp | |
patternstr = r'{}{}'.format(r'\[(\w*)\]\[(\d*)/(\d*)\]\s*', r'(.*)$') | |
# ^comp ^id ^pid ^message | |
m = re.match(patternstr, s) | |
if m: | |
# 1 component | |
# 2 id | |
# 3 pid | |
# 4 message | |
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], m.group(3), m.group(1), m.group(2), date, millis, '', '', '', m.group(4)]) | |
else: | |
mywriter.writerow([fileinfo['path'], line_number, fileinfo['host'], fileinfo['pid'], fileinfo['comp'], fileinfo['id'], date, millis, '', '', '', s]) | |
def process_args(): | |
global dirlist, start_date, end_date, filename_prefix, output_filename, host_li, show_fullpath | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing.") | |
parser.add_argument("--output_dir", help="where the output file should be written to.") | |
parser.add_argument("--start_date", help="the date to begin processing errors. Log lines with dates before the start date will be filtered out. Example format: 2021-09-21") | |
parser.add_argument("--end_date", help="the date to end processing errors. Log lines with dates after the end date will be filtered out. Example format: 2021-09-21") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument("--hosts", help="list of hosts, separated by commas.") | |
parser.add_argument("--filename_prefix", help="Output filename prefix.") | |
parser.add_argument("--show_fullpath", help="Output the full path. Default is false.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a set of XAP log files formatted with standard XAP out-of-the-box settings.") | |
args = parser.parse_args() | |
if args.filename_prefix: | |
filename_prefix = args.filename_prefix | |
if args.start_dir: | |
dirlist = [args.start_dir] | |
if args.start_date: | |
start_date = datetime.datetime.strptime(args.start_date, filter_date_fmt) | |
if args.end_date: | |
end_date = datetime.datetime.strptime(args.end_date, filter_date_fmt) | |
if args.output_dir: | |
output_filename = args.output_dir + os.path.sep + filename_prefix + "-" + gettimestamp() + ".csv" | |
else: | |
output_filename = home_dir + os.path.sep + filename_prefix + '-' + gettimestamp() + ".csv" | |
if args.show_fullpath: | |
show_fullpath = args.show_fullpath | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
if args.hosts: | |
host_li = args.hosts.split(',') | |
def myvisitor(extlist, dirname, names): | |
global fileinfo | |
logging.debug("Current directory: %s", dirname) | |
for f in names: | |
(b, ext) = os.path.splitext(f) | |
logging.debug("Filename base: %s Ext: %s", b, ext) | |
for x in extlist: | |
m = re.match(x, ext) | |
if m: | |
fullpath = os.path.join(dirname, f) | |
logging.debug("Fullpath: %s", fullpath) | |
try: | |
hostname = get_hostname(f) | |
fileinfo = {'host': hostname} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
#except OSError, detail: | |
# print detail | |
break | |
def myvisitor_2(fullpath, start_dir, filename): | |
global fileinfo | |
try: | |
relative_path = "{}{}".format('.', fullpath.replace(start_dir, '', 1)) | |
hostname = get_hostname(relative_path) | |
if not show_fullpath: | |
path = relative_path | |
else: | |
path = fullpath | |
(comp, id, host, pid) = get_pid(filename) | |
if hostname == '': | |
hostname = host | |
fileinfo = {'host': hostname, 'path': path, 'comp': comp, 'id': id, 'pid': pid} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
def main(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
global mywriter | |
process_args() | |
# write output to csv file | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
mywriter.writerow(col_li) | |
for i in dirlist: | |
logging.debug("Processing: %s", i) | |
for root, dirs, files in os.walk(i): | |
for name in files: | |
logging.debug(os.path.join(root, name)) | |
(b, ext) = os.path.splitext(name) | |
for x in extlist: | |
m = re.match(x, ext) | |
if m: | |
fullpath = os.path.join(root, name) | |
myvisitor_2(fullpath, i, name) | |
for name in dirs: | |
logging.debug(os.path.join(root, name)) | |
#os.path.walk(i, myvisitor, extlist) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import os | |
import sys | |
start_dir = os.path.expanduser('~') | |
show_relpath = False | |
filter_li = [] | |
class file_suffix_filter: | |
# heap dump files | |
# extlist = ['.hprof'] | |
def __init__(self, li): | |
self.extlist = li | |
def hasFileMatch(self): | |
return True | |
def hasDirectoryMatch(self): | |
return False | |
def isFileMatch(self, path, filename): | |
(base, ext) = os.path.splitext(filename) | |
if ext in self.extlist: | |
return True | |
else: | |
return False | |
class named_dir_filter: | |
def __init__(self, named_dir_li): | |
self.dirname_li = named_dir_li | |
def hasFileMatch(self): | |
return False | |
def hasDirectoryMatch(self): | |
return True | |
def isDirectoryMatch(self, dirname): | |
logging.debug("dirname is:" + dirname) | |
if dirname in self.dirname_li: | |
return True | |
else: | |
return False | |
class large_file_filter: | |
def __init__(self, f_size): | |
self.file_size = f_size | |
def hasFileMatch(self): | |
return True | |
def hasDirectoryMatch(self): | |
return False | |
def isFileMatch(self, path, filename): | |
fname = os.path.join(path, filename) | |
if not os.path.islink(fname): | |
f_size = os.path.getsize(fname) | |
if f_size > self.file_size: | |
return True | |
else: | |
return False | |
else: | |
return False | |
# recursively visit directory and its children | |
def process(): | |
for root, dirs, files in os.walk(start_dir): | |
rel_dir = os.path.relpath(root, start_dir) | |
for name in files: | |
for filter in filter_li: | |
if filter.hasFileMatch() and filter.isFileMatch(root, name): | |
if show_relpath == True: | |
filename = os.path.join('.', rel_dir, name) | |
print(filename) | |
else: | |
filename = os.path.join(root, name) | |
print(filename) | |
for dir in dirs: | |
for filter in filter_li: | |
if filter.hasDirectoryMatch() and filter.isDirectoryMatch(dir): | |
if show_relpath == True: | |
filename = os.path.join('.', rel_dir, dir) | |
print(filename) | |
else: | |
filename = os.path.join(root, dir) | |
print(filename) | |
def process_args(): | |
global start_dir, show_relpath, filter_li; | |
is_file_suffix_filter = True | |
file_suffix = ['.hprof'] | |
is_named_dir_filter = True | |
is_large_file_filter = True | |
large_file_filter_size = 1_000_000_000 | |
named_dir = ['logs'] | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="The root directory to begin processing. Default is the user's home directory.") | |
parser.add_argument("--show_relpath", help="Output the relative path, otherwise show full path. Default is False.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument("--file_suffix_filter", choices=['true', 'false'], help="Filter in files that match a suffix. Default is true.") | |
parser.add_argument("--file_suffix", help="A list of file suffixes to be used with --file_suffix_filter, separated by commas. Default suffixes: '.hprof'.") | |
parser.add_argument("--named_dir_filter", choices=['true', 'false'], help="Filter in directories based on a name. Default is true.") | |
parser.add_argument("--named_dir", help="A list of directories used with --named_dir_filter, separated by commas. Default directories: 'logs'. Other suggestions: target,work,deploy") | |
parser.add_argument("--large_file_filter", choices=['true', 'false'], help="Filter in files larger than a default size of {}. Default is true.".format(large_file_filter_size)) | |
parser.add_argument("--large_file_filter_size", help="Large file filter size.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program will recurse a directory and look for files to be cleaned up.") | |
# process arguments | |
args = parser.parse_args() | |
if args.start_dir: | |
start_dir = args.start_dir | |
if args.show_relpath: | |
if args.show_relpath.lower() == 'true' or args.show_relpath.lower() == 't': | |
show_relpath = True | |
if args.file_suffix_filter: | |
if args.file_suffix_filter.lower() == 'true' or args.file_suffix_filter.lower() == 't': | |
is_file_suffix_filter = True | |
else: | |
is_file_suffix_filter = False | |
if args.file_suffix: | |
file_suffix = args.file_suffix.split(',') | |
if args.named_dir_filter: | |
if args.named_dir_filter.lower() == 'true' or args.named_dir_filter.lower() == 't': | |
is_named_dir_filter = True | |
else: | |
is_named_dir_filter = False | |
if args.named_dir: | |
named_dir = args.named_dir.split(',') | |
if args.large_file_filter: | |
if args.large_file_filter.lower() == 'true' or args.large_file_filter.lower() == 't': | |
is_large_file_filter = True | |
else: | |
is_large_file_filter = False | |
if args.large_file_filter_size: | |
large_file_filter_size = int(args.large_file_filter_size) | |
# set values based on arguments | |
if is_file_suffix_filter == True: | |
filter_li.append(file_suffix_filter(file_suffix)) | |
if is_named_dir_filter == True: | |
filter_li.append(named_dir_filter(named_dir)) | |
if is_large_file_filter == True: | |
filter_li.append(large_file_filter(large_file_filter_size)) | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
def main(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
process() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
from datetime import datetime | |
import logging | |
import sys | |
file = r'C:\Users\Dixson\tmp.csv' | |
before_dt = None | |
after_dt = None | |
col_no = 1 | |
def process(fin): | |
with open('tmp.csv', 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
with open(fin, newline='') as csvfile: | |
reader = csv.reader(csvfile) | |
for row in reader: | |
value = row[col_no] | |
dt = convert_dt(value) | |
logging.debug("Value: {}, date: {} on column {}".format(value, dt, col_no)) | |
if dt == None: | |
mywriter.writerow(row) | |
#print(', '.join(row)) | |
continue | |
if (before_dt == None or dt < before_dt): | |
if( after_dt == None or dt > after_dt): | |
mywriter.writerow(row) | |
#print(', '.join(row)) | |
# example date: 2017-01-05 14:11:21 | |
def convert_dt(s): | |
try: | |
return datetime.strptime(s, '%Y-%m-%d %H:%M:%S') | |
except Exception as error: | |
return None | |
def process_args(): | |
global file, before_dt, after_dt, col_no | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("-f", "--file", help="the input file. If not provided, /dev/stdin is used.") | |
parser.add_argument("--before", help='include dates before provided date. E.g., --before "2017-01-05 14:11:21"') | |
parser.add_argument("--after", help="include dates after provided date.") | |
parser.add_argument("--columnNumber", help="the column number that has the date field, beginning at 0.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a csv file using the date filter criteria.") | |
args = parser.parse_args() | |
if args.file: | |
file = args.file | |
else: | |
# won't work on Windows | |
file = '/dev/stdin' | |
if args.before: | |
before_dt = convert_dt(args.before) | |
if args.after: | |
after_dt = convert_dt(args.after) | |
if args.columnNumber: | |
col_no = int(args.columnNumber) | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
def main(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
process(file) | |
main() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import datetime | |
import logging | |
import os | |
import re | |
import sys | |
''' | |
This program parses a gc log file for stop the world phases and keywords to csv file. | |
PrintGCDateStamps has been enabled | |
PrintAdaptiveSizePolicy has been enabled | |
young gc types - G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold | |
mixed types - G1 Evacuation Pause | |
full gc types - Allocation Failure, System.gc() | |
''' | |
# list of columns | |
col_li = ['file name', 'line no.', 'host', 'pid','date time', 'process time', 'gc type/keyword', 'time', 'comment'] | |
# list of special extensions to visit | |
# versioned logs will be in the format .1, .2, etc. This is checked elsewhere. | |
extlist = ['.current'] | |
#extlist = ['.log', '.current', '.1', '.2'] | |
# special patterns to search for | |
search_li = ['to-space','humongous'] | |
#search_li = ['to-space','humongous', r'System.gc\(\)'] | |
datefmt = r'(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): ' | |
# ^ date time ^ timestamp | |
def gethomedir(): | |
return os.path.expanduser('~') | |
def gettimestamp(): | |
today = datetime.date.today() | |
return today.strftime("%Y%b%d") | |
# globals | |
# list of directories to visit | |
dirlist = [r'E:\log'] | |
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp()) | |
show_relative_path = False | |
enable_humongous = False | |
# a list of possible host names | |
host_li = [] | |
# log files were collected and put in directories by hostname, separated by '.' | |
def get_hostname(dirpath): | |
for h in host_li: | |
if dirpath.find(h) > -1: | |
return h | |
return '' | |
# get the pid from the log file name | |
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid | |
def get_pid(filename): | |
li = filename.split('pid') | |
if( len(li) == 1 ): | |
return li[0] | |
else: | |
(pid, rest) = li[1].split('.', 1) | |
logging.debug("pid: %s", pid) | |
return pid | |
# not used, this is a deprecated version of the visitor | |
#def myvisitor(extlist, dirname, names): | |
# global fileinfo | |
# logging.debug("Current directory: %s", dirname) | |
# for f in names: | |
# (p, ext) = os.path.splitext(f) | |
# logging.debug("%s %s", f, ext) | |
# if ext in extlist: | |
# fullpath = os.path.join(dirname, f) | |
# logging.debug(fullpath) | |
# try: | |
# hostname = get_hostname(dirname) | |
# pid = get_pid(f) | |
# fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid} | |
# | |
# process_file(fullpath) | |
# except OSError as err: | |
# print("OS error: {0}".format(err)) | |
# | |
# #except OSError, detail: | |
# # print detail | |
def myvisitor_2(fullpath, f): | |
global fileinfo | |
try: | |
hostname = get_hostname(f) | |
pid = get_pid(f) | |
fileinfo = {'filename': f, 'host': hostname, 'pid': pid} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
def process_file(fullpath): | |
# this section processes log messages that occupy a single linen | |
linenum = 0 | |
f = open(fullpath, 'r') | |
date_time = '' | |
process_time = '' | |
# process line by line to get basic information | |
for line in f: | |
linenum += 1 | |
m = re.match(r'^' + datefmt, line) | |
if m: | |
# save current timestamp | |
date_time = m.group(1) | |
process_time = m.group(2) | |
# check for keywords of interest | |
process_search_pattern(line, linenum, date_time, process_time) | |
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'): | |
process_jvminfo(line, linenum) | |
elif line.startswith(' ') == False: | |
# check for stw pauses that appear on one line | |
process_remark_cleanup(line, linenum) | |
# this section processes log messages that span multiple lines | |
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used, | |
# gc phases need a multi-line regex to handle | |
# check for stw pause that spans multiple lines | |
f.seek(0) | |
text = f.read() | |
f.close() | |
# we are interested in activity that causes a stop-the-world pause and the duration of the gc | |
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs | |
# https://www.oracle.com/technetwork/articles/java/g1gc-1984535.html | |
# process multi-line gc phases | |
process_young_mixed(text) | |
process_full(text) | |
############################################################################### | |
# methods that process a multi-line messages | |
############################################################################### | |
def process_young_mixed(s): | |
''' | |
young generation and mixed collection share similar formats | |
These gc log statements show up on multiple lines. | |
Example: | |
2017-09-01T16:12:51.133+0000: 134.345: [GC pause (Metadata GC Threshold) (young) (initial-mark) | |
Desired survivor size 48234496 bytes, new threshold 15 (max 15) | |
134.346: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 0, predicted base time: 10.00 ms, remaining time: 990.00 ms, target pause time: 1000.00 ms] | |
134.346: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 63 regions, survivors: 0 regions, predicted young region time: 4209.46 ms] | |
134.346: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 63 regions, survivors: 0 regions, old: 0 regions, predicted pause time: 4219.46 ms, target pause time: 1000.00 ms] | |
, 0.0325663 secs] | |
''' | |
date_time = '' | |
process_time = 0.0 | |
young_mixed_type = '' # young or mixed | |
secondary_type = '' # Eg, G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold | |
initial_mark = '' # tertiary type, associated with G1 Humongous Allocation and Metadata GC Threshold | |
gc_time = 0.0 | |
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \((young|mixed)\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$' | |
''' | |
^secondary ^young/mixed ^initial_mark ^ elapsed time | |
''' | |
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL) | |
#pattern = re.compile(r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d\+\d\d\d\d): (\d*\.\d*): \[GC pause \(([ \w\.\(\)]*)\) \(young\).+?, (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
date_time = m.group(1) | |
process_time = m.group(2) | |
young_mixed_type = m.group(4) | |
secondary_type = m.group(3) | |
if m.group(5) == None: | |
initial_mark = '' | |
else: | |
tmp = m.group(5) | |
tmp = tmp.strip('() ') | |
initial_mark = ' ' + tmp | |
gc_time = m.group(6) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'GC pause - ' + young_mixed_type + ' ' + secondary_type + initial_mark, gc_time, '']) | |
def process_mixed(s): | |
''' | |
2017-09-01T17:53:24.732+0000: 6167.945: [GC pause (G1 Evacuation Pause) (mixed) | |
Desired survivor size 48234496 bytes, new threshold 1 (max 15) | |
- age 1: 303167832 bytes, 303167832 total | |
6167.945: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 8728, predicted base time: 24.66 ms, remaining time: 975.34 ms, target pause time: 1000.00 ms] | |
6167.945: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 105 regions, survivors: 74 regions, predicted young region time: 305.85 ms] | |
6167.945: [G1Ergonomics (CSet Construction) finish adding old regions to CSet, reason: reclaimable percentage not over threshold, old: 19 regions, max: 359 regions, reclaimable: 751186712 bytes (5.00 %), threshold: 5.00 %] | |
6167.945: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 105 regions, survivors: 74 regions, old: 19 regions, predicted pause time: 362.13 ms, target pause time: 1000.00 ms] | |
6168.115: [G1Ergonomics (Mixed GCs) do not continue mixed GCs, reason: reclaimable percentage not over threshold, candidate old regions: 335 regions, reclaimable: 751186712 bytes (5.00 %), threshold: 5.00 %] | |
, 0.1695338 secs] | |
''' | |
process_time = 0.0 | |
date_time = '' | |
mixed_type = '' | |
gc_time = 0.0 | |
# output similar to GC pause (young) | |
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$' | |
# ^mixed_type | |
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
date_time = m.group(1) | |
process_time = m.group(2) | |
mixed_type = m.group(3) | |
gc_time = m.group(5) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Mixed generation collection - ' + mixed_type, gc_time, '']) | |
def process_full(s): | |
''' | |
Full GC statements are also output to multiple lines. | |
2018-07-30T11:39:47.643-0400: 174.007: [Full GC (Heap Inspection Initiated GC) 2018-07-30T11:39:47.643-0400: 174.007: [Class Histogram (before full gc): | |
2018-07-25T11:59:08.922+0000: 1098967.077: [Full GC (System.gc()) 2018-07-25T11:59:08.927+0000: 1098967.081: [Class Histogram (before full gc): | |
2018-07-21T12:11:41.060+0000: 387110.898: [Full GC (Allocation Failure) 2018-07-21T12:11:41.060+0000: 387110.898: [Class Histogram (before full gc): | |
... | |
..., real=6.79 secs] | |
''' | |
date_time = '' | |
process_time = 0.0 | |
young_type = '' | |
gc_time = 0.0 | |
gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, real=(\d+\.\d+) secs\]\s*$' | |
# ^ full gc type | |
patternstr = datefmt + gcfmt | |
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
date_time = m.group(1) | |
process_time = m.group(2) | |
full_gc_type = m.group(3) | |
gc_time = m.group(4) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Full GC - ' + full_gc_type , gc_time, '']) | |
############################################################################### | |
# end methods that process a multi-line messages | |
############################################################################### | |
############################################################################### | |
# methods that process a single line | |
############################################################################### | |
def process_jvminfo(s, linenum): | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', '', 'jvm info', '', s]) | |
def process_remark_cleanup(s, linenum): | |
''' | |
These gc log statements show up on a single line. | |
Example: | |
2017-09-01T16:12:51.175+0000: 134.388: [GC remark 2017-09-01T16:12:51.175+0000: 134.388: [Finalize Marking, 0.0058528 secs] 2017-09-01T16:12:51.181+0000: 134.394: [GC ref-proc, 0.0001349 secs] 2017-09-01T16:12:51.181+0000: 134.394: [Unloading, 0.0032643 secs], 0.0100601 secs] | |
44973.856: [GC cleanup 22G->22G(30G), 0.0100070 secs] | |
[Times: user=0.08 sys=0.00, real=0.01 secs] | |
''' | |
gc_type = '' | |
date_time = '' | |
process_time = 0.0 | |
gc_time = 0.0 | |
m = re.match(r'^' + datefmt + r'\[GC remark .*, (\d+\.\d+) secs\]$', s) | |
if m: | |
gc_type = 'GC remark' | |
date_time = m.group(1) | |
process_time = m.group(2) | |
gc_time = m.group(3) | |
else: | |
m = re.match(r'^' + datefmt + r'\[GC cleanup .+, (\d+\.\d+) secs\]$', s) | |
if m: | |
gc_type = 'GC cleanup' | |
date_time = m.group(1) | |
process_time = m.group(2) | |
gc_time = m.group(3) | |
if gc_type != '': | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, gc_type, gc_time]) | |
def process_search_pattern(s, linenum, date_time, process_time): | |
''' | |
Look for search strings of interest. If found write to csv. | |
''' | |
patternstr = r'({})'.format('|'.join(search_li)) | |
m = re.search(patternstr, s, re.IGNORECASE) | |
if m: | |
search_pattern = m.group(1).lower() | |
if search_pattern == 'humongous' and enable_humongous == False: | |
return | |
else: | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s]) | |
''' | |
for search_pattern in search_li: | |
if re.search(search_pattern, s, re.IGNORECASE): | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s]) | |
break | |
''' | |
############################################################################### | |
# end methods that process a single line | |
############################################################################### | |
def process_args(): | |
global dirlist, output_filename, enable_humongous, show_relative_path, host_li | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing.") | |
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument("--enable_humongous", help='True enables inclusion of any log messages that have to do with humongous allocation. Default is False.') | |
parser.add_argument("--show_relative_path", help="show relative path in filename column. true or false. Default is false.") | |
parser.add_argument("--hosts", help="list of hosts, separated by commas.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=r'This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCCause -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:+PrintReferenceGC') | |
args = parser.parse_args() | |
if args.start_dir: | |
dirlist = [args.start_dir] | |
if args.output_dir: | |
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv" | |
if args.enable_humongous: | |
if args.enable_humongous.lower() == 'true' or args.enable_humongous.lower() == 't': | |
enable_humongous = True | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
if args.show_relative_path and args.show_relative_path.lower() == 'true': | |
show_relative_path = True | |
if args.hosts: | |
host_li = args.hosts.split(',') | |
def main(): | |
global mywriter | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
# write output to csv file | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
# write column headings | |
mywriter.writerow(col_li) | |
for dir in dirlist: | |
logging.debug(dir) | |
for root, dirs, files in os.walk(dir): | |
for name in files: | |
logging.debug(os.path.join(root, name)) | |
(b, extension) = os.path.splitext(name) | |
ext = extension.lstrip('.') | |
if extension in extlist or ext.isdigit(): | |
fullpath = os.path.join(root, name) | |
if show_relative_path == True: | |
# add one for path separator | |
index = len(dir) + 1 | |
fname = fullpath[index:] | |
myvisitor_2(fullpath, fname) | |
else: | |
myvisitor_2(fullpath, name) | |
for name in dirs: | |
logging.debug(os.path.join(root, name)) | |
#os.path.walk(dir, myvisitor, extlist) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import datetime | |
import logging | |
import os | |
import re | |
import sys | |
''' | |
This program parses a gc log file for stop the world phases and keywords to csv file. | |
young gc types - G1 Evacuation Pause, G1 Humongous Allocation, Metadata GC Threshold | |
mixed types - G1 Evacuation Pause | |
full gc types - Allocation Failure, System.gc() | |
''' | |
# list of columns | |
col_li = ['file name', 'line no.', 'host', 'pid','date time', 'process time', 'gc type/keyword', 'time', 'comment'] | |
# list of extensions to visit | |
extlist = ['.current', '.0', '.1', '.2', '.3', '.4', '.5'] | |
#extlist = ['.log', '.current', '.1', '.2'] | |
# special patterns to search for | |
search_li = ['to-space','humongous'] | |
#search_li = ['to-space','humongous', r'System.gc\(\)'] | |
datefmt = r'^(\d+\.\d+): ' | |
# ^ timestamp | |
#datefmt = r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): ' | |
# ^ date time ^ timestamp | |
def gethomedir(): | |
return os.path.expanduser('~') | |
def gettimestamp(): | |
today = datetime.date.today() | |
return today.strftime("%Y%b%d") | |
# globals | |
# list of directories to visit | |
dirlist = [r'/tmp'] | |
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp()) | |
show_relative_path = False | |
host_li = [] | |
# log files were collected and put in directories by hostname, separated by '.' | |
def get_hostname(dirpath): | |
for h in host_li: | |
if dirpath.find(h) > -1: | |
return h | |
return '' | |
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid | |
def get_pid(filename): | |
li = filename.split('pid') | |
if( len(li) == 1 ): | |
return li[0] | |
else: | |
(pid, rest) = li[1].split('.', 1) | |
logging.debug("pid: %s", pid) | |
return pid | |
def myvisitor(extlist, dirname, names): | |
global fileinfo | |
logging.debug("Current directory: %s", dirname) | |
for f in names: | |
(p, ext) = os.path.splitext(f) | |
logging.debug("%s %s", f, ext) | |
if ext in extlist: | |
fullpath = os.path.join(dirname, f) | |
logging.debug(fullpath) | |
try: | |
hostname = get_hostname(dirname) | |
pid = get_pid(f) | |
fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
#except OSError, detail: | |
# print detail | |
def myvisitor_2(fullpath, f): | |
global fileinfo | |
try: | |
hostname = get_hostname(f) | |
pid = get_pid(f) | |
fileinfo = {'filename': f, 'host': hostname, 'pid': pid} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
def process_file(fullpath): | |
linenum = 0 | |
f = open(fullpath, 'r') | |
date_time = '' | |
process_time = '' | |
# process line by line to get basic information | |
for line in f: | |
linenum += 1 | |
m = re.match(datefmt, line) | |
if m: | |
# save current timestamp | |
date_time = m.group(0) | |
process_time = m.group(1) | |
# check for keywords of interest | |
#process_search_pattern(line, linenum, date_time, process_time) | |
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'): | |
process_jvminfo(line, linenum) | |
elif line.startswith(' ') == False: | |
# check for stw pauses that appear on one line | |
process_remark_cleanup(line, linenum) | |
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used, | |
# gc phases need a multi-line regex to handle | |
# check for stw pause that spans multiple lines | |
f.seek(0) | |
text = f.read() | |
f.close() | |
# we are interested in activity that causes a stop-the-world pause and the duration of the gc | |
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs | |
# https://www.oracle.com/technetwork/articles/java/g1gc-1984535.html | |
# process multi-line gc phases | |
process_young(text) | |
process_mixed(text) | |
process_full(text) | |
def process_jvminfo(s, linenum): | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', '', 'jvm info', '', s]) | |
def process_young(s): | |
''' | |
232610.071: [GC pause (G1 Evacuation Pause) (young) | |
Desired survivor size 1090519040 bytes, new threshold 15 (max 15) | |
- age 1: 2294896 bytes, 2294896 total | |
- age 2: 1768760 bytes, 4063656 total | |
- age 3: 2228888 bytes, 6292544 total | |
- age 4: 4939064 bytes, 11231608 total | |
- age 5: 4320224 bytes, 15551832 total | |
- age 6: 2211832 bytes, 17763664 total | |
- age 7: 594464 bytes, 18358128 total | |
- age 8: 1539128 bytes, 19897256 total | |
- age 9: 3044240 bytes, 22941496 total | |
- age 10: 2794640 bytes, 25736136 total | |
- age 11: 3209632 bytes, 28945768 total | |
- age 12: 2267952 bytes, 31213720 total | |
- age 13: 2402216 bytes, 33615936 total | |
- age 14: 2345184 bytes, 35961120 total | |
- age 15: 2231848 bytes, 38192968 total | |
232610.071: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 13138, predicted base time: 78.16 ms, remaining time: 121.84 ms, target pause time: 200.00 ms] | |
232610.071: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 1035 regions, survivors: 4 regions, predicted young region time: 11.03 ms] | |
232610.071: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 1035 regions, survivors: 4 regions, old: 0 regions, predicted pause time: 89.19 ms, target pause time: 200.00 ms] | |
, 0.1156739 secs] | |
''' | |
logging.debug("In process_young") | |
date_time = '' | |
process_time = 0.0 | |
young_type = '' | |
initial_mark = '' | |
gc_time = 0.0 | |
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(young\).+?, (\d+\.\d+) secs\]$' | |
''' | |
^type ^ elapsed time | |
''' | |
#patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(young\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$' | |
''' | |
^type ^initial_mark ^ elapsed time | |
''' | |
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL) | |
#pattern = re.compile(r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d\+\d\d\d\d): (\d*\.\d*): \[GC pause \(([ \w\.\(\)]*)\) \(young\).+?, (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
process_time = m.group(1) | |
young_type = m.group(2) | |
gc_time = m.group(3) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Young generation collection - ' + young_type + initial_mark, gc_time, '']) | |
def process_mixed(s): | |
''' | |
257167.069: [GC pause (G1 Evacuation Pause) (mixed) | |
Desired survivor size 117440512 bytes, new threshold 15 (max 15) | |
- age 1: 169008 bytes, 169008 total | |
- age 2: 5032 bytes, 174040 total | |
- age 3: 2712288 bytes, 2886328 total | |
- age 4: 820208 bytes, 3706536 total | |
- age 5: 916704 bytes, 4623240 total | |
- age 6: 3246680 bytes, 7869920 total | |
- age 7: 852856 bytes, 8722776 total | |
- age 8: 605648 bytes, 9328424 total | |
- age 9: 983264 bytes, 10311688 total | |
- age 10: 1685120 bytes, 11996808 total | |
- age 11: 692152 bytes, 12688960 total | |
- age 12: 2147224 bytes, 14836184 total | |
- age 13: 1511072 bytes, 16347256 total | |
- age 14: 1832744 bytes, 18180000 total | |
- age 15: 1066168 bytes, 19246168 total | |
257167.069: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 70042, predicted base time: 71.62 ms, remaining time: 128.38 ms, target pause time: 200.00 ms] | |
257167.069: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 109 regions, survivors: 3 regions, predicted young region time: 6.64 ms] | |
257167.069: [G1Ergonomics (CSet Construction) finish adding old regions to CSet, reason: predicted time is too high, predicted time: 3.29 ms, remaining time: 0.00 ms, old: 79 regions, min: 79 regions] | |
257167.069: [G1Ergonomics (CSet Construction) added expensive regions to CSet, reason: old CSet region num not reached min, old: 79 regions, expensive: 29 regions, min: 79 regions, remaining time: 0.00 ms] | |
257167.069: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 109 regions, survivors: 3 regions, old: 79 regions, predicted pause time: 285.70 ms, target pause time: 200.00 ms] | |
257167.236: [G1Ergonomics (Mixed GCs) continue mixed GCs, reason: candidate old regions available, candidate old regions: 344 regions, reclaimable: 2334497912 bytes (6.21 %), threshold: 5.00 %] | |
, 0.1677699 secs] | |
''' | |
process_time = 0.0 | |
date_time = '' | |
mixed_type = '' | |
gc_time = 0.0 | |
# output similar to GC pause (young) | |
patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\).+?, (\d+\.\d+) secs\]$' | |
# ^mixed_type | |
#patternstr = datefmt + r'\[GC pause \(([ \w]*)\) \(mixed\)( \([\w-]+\))?.+?, (\d+\.\d+) secs\]$' | |
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
process_time = m.group(1) | |
mixed_type = m.group(2) | |
gc_time = m.group(3) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Mixed generation collection - ' + mixed_type, gc_time, '']) | |
def process_full(s): | |
''' | |
422052.838: [Full GC (System.gc()) 16G->10G(35G), 34.1545090 secs] | |
''' | |
date_time = '' | |
process_time = 0.0 | |
young_type = '' | |
gc_time = 0.0 | |
gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, (\d+\.\d+) secs\]$' | |
# ^ full gc type | |
#gcfmt = r'\[Full GC \(([ \w\.\(\)]*)\) .+?, real=(\d+\.\d+) secs\]\s*$' | |
patternstr = datefmt + gcfmt | |
pattern = re.compile(patternstr, re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
process_time = m.group(1) | |
full_gc_type = m.group(2) | |
gc_time = m.group(3) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], date_time, process_time, 'Full GC - ' + full_gc_type , gc_time, '']) | |
def process_remark_cleanup(s, linenum): | |
''' | |
These gc log statements show up on a single line. | |
Example: | |
706.065: [GC cleanup 220M->218M(512M), 0.0021548 secs] | |
706.035: [GC remark, 0.0278976 secs] | |
108684.812: [GC remark 108684.812: [Finalize Marking, 0.0018014 secs] 108684.814: [GC ref-proc, 0.0089392 secs] 108684.823: [Unloading, 0.0317085 secs], 0.0672140 secs] | |
''' | |
gc_type = '' | |
date_time = '' | |
process_time = 0.0 | |
gc_time = 0.0 | |
m = re.match(datefmt + r'\[GC remark.+(\d+\.\d+) secs\]$', s) | |
if m: | |
gc_type = 'GC remark' | |
process_time = m.group(1) | |
gc_time = m.group(2) | |
else: | |
m = re.match(datefmt + r'\[GC cleanup .+, (\d+\.\d+) secs\]$', s) | |
if m: | |
gc_type = 'GC cleanup' | |
date_time = '0' | |
process_time = m.group(1) | |
gc_time = m.group(2) | |
if gc_type != '': | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, gc_type, gc_time]) | |
def process_search_pattern(s, linenum, date_time, process_time): | |
''' | |
Look for search strings of interest. If found write to csv. | |
''' | |
patternstr = r'({})'.format('|'.join(search_li)) | |
m = re.search(patternstr, s, re.IGNORECASE) | |
if m: | |
search_pattern = m.group(1).lower() | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s]) | |
''' | |
for search_pattern in search_li: | |
if re.search(search_pattern, s, re.IGNORECASE): | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], date_time, process_time, search_pattern, '', s]) | |
break | |
''' | |
def process_args(): | |
global dirlist, output_filename, show_relative_path, host_li | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing.") | |
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument("--show_relative_path", help="show relative path in filename column. true or false. Default is false.") | |
parser.add_argument("--hosts", help="list of hosts, separated by commas.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=r'This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:-PrintReferenceGC') | |
args = parser.parse_args() | |
if args.start_dir: | |
dirlist = [args.start_dir] | |
output_filename = args.start_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv" | |
if args.output_dir: | |
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv" | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
if args.show_relative_path and args.show_relative_path.lower() == 'true': | |
show_relative_path = True | |
if args.hosts: | |
host_li = args.hosts.split(',') | |
def main(): | |
global mywriter | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
# write output to csv file | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
# write column headings | |
mywriter.writerow(col_li) | |
for dir in dirlist: | |
logging.debug(dir) | |
for root, dirs, files in os.walk(dir): | |
for name in files: | |
logging.debug(os.path.join(root, name)) | |
(b, ext) = os.path.splitext(name) | |
for x in extlist: | |
m = re.match(x, ext) | |
if m: | |
fullpath = os.path.join(root, name) | |
if show_relative_path == True: | |
# add one for path separator | |
index = len(dir) + 1 | |
fname = fullpath[index:] | |
myvisitor_2(fullpath, fname) | |
else: | |
myvisitor_2(fullpath, name) | |
for name in dirs: | |
logging.debug(os.path.join(root, name)) | |
#os.path.walk(dir, myvisitor, extlist) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import csv | |
import re | |
import logging | |
import argparse | |
import datetime | |
# list of columns | |
col_li = ['file name', 'line no.', 'host', 'pid', 'process time', 'gc type/keyword', 'time', 'size before gc', 'size after gc', 'total heap size'] | |
# list of directories to visit | |
dirlist = [r'E:\log'] | |
show_relative_path = False | |
def gethomedir(): | |
return os.path.expanduser('~') | |
def gettimestamp(): | |
today = datetime.date.today() | |
return today.strftime("%Y%b%d") | |
output_filename = '{}{}gc-summary-{}.csv'.format(gethomedir(), os.path.sep, gettimestamp()) | |
# list of extensions to visit | |
extlist = ['.log'] | |
# special patterns to search for | |
search_li = ['to-space','humongous', r'System.gc\(\)'] | |
# log files were collected and put in directories by hostname, separated by '.' | |
def get_hostname(dirpath): | |
(head, tail) = os.path.split(dirpath) | |
if tail.find('.') > -1: | |
(hostname, rest) = tail.split('.', 1) | |
logging.debug("hostname: %s", hostname) | |
return hostname | |
else: | |
return '' | |
# use Xloggc:/path/to/file/gc.%p.log, where %p tells the JVM to substitute the pid | |
def get_pid(filename): | |
li = filename.split('pid') | |
if( len(li) == 1 ): | |
return li[0] | |
else: | |
(pid, rest) = li[1].split('.', 1) | |
logging.debug("pid: %s", pid) | |
return pid | |
def myvisitor(extlist, dirname, names): | |
global fileinfo | |
logging.debug("Current directory: %s", dirname) | |
for f in names: | |
(p, ext) = os.path.splitext(f) | |
logging.debug("%s %s", f, ext) | |
if ext in extlist: | |
fullpath = os.path.join(dirname, f) | |
logging.debug(fullpath) | |
try: | |
hostname = get_hostname(dirname) | |
pid = get_pid(f) | |
fileinfo = {'filename': f, 'host' : hostname, 'pid' : pid} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
#except OSError, detail: | |
# print detail | |
def process_jvminfo(s, linenum): | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', 'jvm info', '', '', '', '', s]) | |
def process_file(fullpath): | |
linenum = 0 | |
f = open(fullpath, 'r') | |
# process line by line to get basic information | |
for line in f: | |
linenum += 1 | |
# check for keywords of interest | |
process_search_pattern(line, linenum) | |
if line.startswith('Java HotSpot(TM)') or line.startswith('Memory:') or line.startswith('CommandLine flags:'): | |
process_jvminfo(line, linenum) | |
elif line.startswith(' ') == False: | |
process_remark_cleanup_fullgc(line, linenum) | |
# read file object to string. When -XX:+PrintAdaptiveSizePolicy is used, | |
# gc phases need a multi-line regex to handle | |
# check for stw pause that spans multiple lines | |
f.seek(0) | |
text = f.read() | |
f.close() | |
# we are interested in activity that causes a stop-the-world pause and the duration of the gc | |
# https://blogs.oracle.com/poonam/entry/understanding_g1_gc_logs | |
# process multi-line gc phases | |
process_young(text) | |
process_mixed(text) | |
def process_young(s): | |
''' | |
These gc log statements show up on multiple lines. | |
Example: | |
54614.619: [GC pause (young) | |
Desired survivor size 109051904 bytes, new threshold 16 (max 25) | |
- age 1: 9991736 bytes, 9991736 total | |
54614.620: [G1Ergonomics (CSet Construction) start choosing CSet, _pending_cards: 4184, predicted base time: 28.58 ms, remaining time: 971.42 ms, target pause time: 1000.00 ms] | |
54614.620: [G1Ergonomics (CSet Construction) add young regions to CSet, eden: 199 regions, survivors: 4 regions, predicted young region time: 939.32 ms] | |
54614.620: [G1Ergonomics (CSet Construction) finish choosing CSet, eden: 199 regions, survivors: 4 regions, old: 0 regions, predicted pause time: 967.90 ms, target pause time: 1000.00 ms] | |
54614.644: [SoftReference, 878 refs, 0.0006080 secs]54614.645: [WeakReference, 1371 refs, 0.0003980 secs]54614.645: [FinalReference, 6591 refs, 0.0029020 secs]54614.648: [PhantomReference, 5 refs, 106 refs, 0.0019450 secs]54614.650: [JNI Weak Reference, 0.0090930 secs], 0.0433140 secs] | |
''' | |
process_time = 0.0 | |
gc_time = 0.0 | |
pattern = re.compile(r'^(\d*\.\d*): \[GC pause [ \w\(\)]* \(young\)(.+?), (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL) | |
# multi-line search | |
for m in pattern.finditer(s): | |
process_time = m.group(1) | |
gc_time = m.group(3) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], process_time, 'Young generation collection', gc_time, '', '', '']) | |
def process_mixed(s): | |
process_time = 0.0 | |
gc_time = 0.0 | |
# output similar to GC pause (young) | |
pattern = re.compile(r'^(\d*\.\d*): \[GC pause \(mixed\)(.+?), (\d*\.\d*) secs\]$', re.MULTILINE | re.DOTALL) | |
for m in pattern.finditer(s): | |
process_time = m.group(1) | |
gc_time = m.group(3) | |
mywriter.writerow([fileinfo['filename'], '', fileinfo['host'], fileinfo['pid'], process_time, 'Mixed generation collection', gc_time, '', '', '']) | |
def process_remark_cleanup_fullgc(s, linenum): | |
''' | |
These gc log statements show up on a single line. | |
Example: | |
44973.752: [GC remark 44973.753: [GC ref-proc44973.753: [SoftReference, 3741 refs, 0.0031090 secs]44973.756: [WeakReference, 6937 refs, 0.0069930 secs]44973.763: [FinalReference, 2459 refs, 0.0038880 secs]44973.767: [PhantomReference, 28 refs, 1275 refs, 0.0029950 secs]44973.770: [JNI Weak Reference, 0.0621620 secs], 0.0803160 secs], 0.1021600 secs] | |
[Times: user=0.30 sys=0.00, real=0.11 secs] | |
44973.856: [GC cleanup 22G->22G(30G), 0.0100070 secs] | |
[Times: user=0.08 sys=0.00, real=0.01 secs] | |
151413.747: [Full GC151419.349: [SoftReference, 490 refs, 0.0000980 secs]151419.349: [WeakReference, 5036 refs, 0.0004770 secs]151419.349: [FinalReference, 10 refs, 0.0000230 secs]151419.349: [PhantomReference, 129 refs, 346 refs, 0.0000520 secs]151419.349: [JNI Weak Reference, 0.0025470 secs] 19G->19G(30G), 14.2256960 secs] | |
''' | |
gc_type = '' | |
process_time = 0.0 | |
gc_time = 0.0 | |
gc_size_before = '' | |
gc_size_after = '' | |
total_heap_size = '' | |
m = re.match(r'^(\d*\.\d*): \[GC remark \d*\.\d*: (.+), (\d*\.\d*) secs\]$', s) | |
if m: | |
gc_type = 'GC remark' | |
process_time = m.group(1) | |
gc_time = m.group(3) | |
else: | |
m = re.match(r'^(\d*\.\d*): \[GC cleanup (.+), (\d*\.\d*) secs\]$', s) | |
if m: | |
gc_type = 'GC cleanup' | |
process_time = m.group(1) | |
gc_time = m.group(3) | |
else: | |
m = re.match(r'^(\d*\.\d*): \[Full GC(.+) (\d+[MG])->(\d*[MG])\((\d*[MG])\), (\d*\.\d*) secs\]$', s) | |
if m: | |
gc_type = 'Full GC' | |
process_time = m.group(1) | |
gc_size_before = m.group(3) | |
gc_size_after = m.group(4) | |
total_heap_size = m.group(5) | |
gc_time = m.group(6) | |
if gc_type != '': | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], process_time, gc_type, gc_time, gc_size_before, gc_size_after, total_heap_size]) | |
def process_search_pattern(s, linenum): | |
''' | |
Look for search strings of interest. If found write to csv. | |
''' | |
for search_pattern in search_li: | |
if re.search(search_pattern, s, re.IGNORECASE): | |
s = s.strip() | |
mywriter.writerow([fileinfo['filename'], linenum, fileinfo['host'], fileinfo['pid'], '', search_pattern, '', '', '', '', s]) | |
break | |
def process_args(): | |
global dirlist, output_filename, host_li | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing") | |
parser.add_argument("--output_dir", help="where the output file should be written to. By default the output file will be located in a user's home directory.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level") | |
parser.add_argument("--hosts", help="list of hosts, separated by commas") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a gc log file and provides a summary in csv format. The following JVM options should be used to generate the log file: -Xloggc:/path/to/file/gc_%%p.log -XX:+PrintCommandLineFlags -XX:+PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintTenuringDistribution -XX:+PrintReferenceGC") | |
args = parser.parse_args() | |
if args.start_dir: | |
dirlist = [args.start_dir] | |
if args.output_dir: | |
output_filename = args.output_dir + os.path.sep + "gc_log_summary-" + gettimestamp() + ".csv" | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
if args.hosts: | |
host_li = args.hosts.split(',') | |
def myvisitor_2(fullpath, f): | |
global fileinfo | |
try: | |
hostname = get_hostname(f) | |
pid = get_pid(f) | |
fileinfo = {'filename': f, 'host': hostname, 'pid': pid} | |
process_file(fullpath) | |
except OSError as err: | |
print("OS error: {0}".format(err)) | |
def main(): | |
global mywriter | |
process_args() | |
# write output to csv file | |
with open(output_filename, 'w', newline='') as csvfile: | |
#with open(output_filename, 'wb') as csvfile: | |
mywriter = csv.writer(csvfile) | |
# write column headings | |
mywriter.writerow(col_li) | |
for dir in dirlist: | |
logging.debug(dir) | |
for root, dirs, files in os.walk(dir): | |
for name in files: | |
logging.debug(os.path.join(root, name)) | |
(b, ext) = os.path.splitext(name) | |
for x in extlist: | |
m = re.match(x, ext) | |
if m: | |
fullpath = os.path.join(root, name) | |
if show_relative_path == True: | |
# add one for path separator | |
index = len(dir) + 1 | |
fname = fullpath[index:] | |
myvisitor_2(fullpath, fname) | |
else: | |
myvisitor_2(fullpath, name) | |
for name in dirs: | |
logging.debug(os.path.join(root, name)) | |
#os.path.walk(dir, myvisitor, extlist) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
datefmt = r'^(\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d[\+\-]\d\d\d\d): (\d+\.\d+): ' | |
# ^ date | |
# ^ time | |
# ^ millis | |
# ^ time zone | |
# ^ timestamp | |
filename = '' | |
# check python version | |
def check_version(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
if len(sys.argv) < 2: | |
print("No filename specified.") | |
print("Usage: {} <filename>".format(sys.argv[0])) | |
sys.exit(-1) | |
def process_args(): | |
global filename | |
filename = sys.argv[1] | |
def process(): | |
with open(filename, encoding="latin-1") as f: | |
# the number of lines that have heap size information | |
count = 0 | |
# the total number of lines processed | |
linecount = 1 | |
# date and time stamp | |
date_time = '' | |
# number of seconds elapsed since the process started | |
process_time = '' | |
print(', filename, line_number, date_time, process_time, begin_eden, begin_max_eden, end_eden, end_max_eden, begin_survivor, end_survivor, begin_heap, begin_max_heap, end_heap, end_max_heap') | |
for line in f: | |
line = line.strip() | |
#print(line) | |
m = re.match(datefmt, line) | |
if m: | |
# save current timestamp | |
date_time = m.group(1) | |
process_time = m.group(2) | |
# match heap information in following formats | |
# [Eden: 9632.0M(9632.0M)->0.0B(9624.0M) Survivors: 192.0M->200.0M Heap: 11.4G(16.0G)->2074.8M(16.0G)] | |
# [Eden: 4704.0M(9624.0M)->0.0B(9824.0M) Survivors: 200.0M->0.0B Heap: 6786.9M(16.0G)->931.6M(16.0G)], [Metaspace: 61553K->61499K(1105920K)] | |
edenstr = r'\s*\[Eden: (\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)->(\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\) ' | |
survivorstr = r'Survivors: (\d+\.\d[B|K|M|G])->(\d+\.\d[B|K|M|G]) ' | |
heapstr = r'Heap: (\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)->(\d+\.\d[B|K|M|G])\((\d+\.\d[B|K|M|G])\)\].*' | |
patternstr = edenstr + survivorstr + heapstr | |
m = re.match(patternstr, line) | |
if m: | |
begin_eden = m.group(1) | |
begin_max_eden = m.group(2) | |
end_eden = m.group(3) | |
end_max_eden = m.group(4) | |
begin_survivor = m.group(5) | |
end_survivor = m.group(6) | |
begin_heap = m.group(7) | |
begin_max_heap = m.group(8) | |
end_heap = m.group(9) | |
end_max_heap = m.group(10) | |
count += 1 | |
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format( | |
count, filename, linecount, date_time, process_time, begin_eden, | |
begin_max_eden, end_eden, end_max_eden, begin_survivor, end_survivor, | |
begin_heap, begin_max_heap, end_heap, end_max_heap)) | |
# match heap information in following formats | |
# 2022-02-22T14:22:29.770-0600: 3.287: [GC cleanup 18M->18M(3072M), 0.0059295 secs] | |
# 2022-02-22T14:22:34.301-0600: 7.817: [GC pause (Metadata GC Threshold) (young) (initial-mark) 123M->23M(3072M), 0.1070516 secs] | |
# 2022-02-22T15:27:01.100-0600: 3829.383: [GC pause (G1 Evacuation Pause) (young) 9827M->6775M(11G), 0.1417604 secs] | |
# 2022-02-22T16:42:59.750-0600: 8433.267: [GC pause (G1 Humongous Allocation) (young) (initial-mark) 1683M->1433M(3072M), 0.0867971 secs] | |
#young = r'\[GC pause \([\w ]+\) \(\w+\) (\d+[B|K|M|G])->(\d+[B|K|M|G])\((\d+[B|K|M|G])\), \d+\.\d+ secs\]' | |
young = r'\[GC [\w \(\)-]+ (\d+[B|K|M|G])->(\d+[B|K|M|G])\((\d+[B|K|M|G])\), \d+\.\d+ secs\]' | |
patternstr = datefmt + young | |
m = re.match(patternstr, line) | |
if m: | |
# first 2 group matches are used by datefmt | |
begin_heap = m.group(3) | |
end_heap = m.group(4) | |
end_max_heap = m.group(5) | |
print('{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format( | |
count, filename, linecount, date_time, process_time, '', | |
'', '', '', '', '', | |
begin_heap, '', end_heap, end_max_heap)) | |
count += 1 | |
linecount += 1 | |
def main(): | |
check_version() | |
process_args() | |
process() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
teams = {'NY': 'Giants', 'Dallas' : 'Cowboys', 'Green Bay': 'Packers'} | |
for k,v in teams.iteritems(): | |
print "%s => %s" % (k,v | |
# sort, then print | |
keys = teams.keys() | |
keys.sort() | |
for k in keys: | |
print '%s => %s' % (k, teams[k]) | |
# alternatively | |
for key in sorted(teams): | |
print '%s => %s' % (key, teams[key]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import datetime | |
import logging | |
import os | |
import subprocess | |
import sys | |
import traceback | |
# globals | |
dirlist = [] | |
output_filename = '' | |
# only run jar tvf on extensions of .jar | |
ext_li = [ ".jar"] | |
# only process the following file types in jar tvf output | |
filetype_ext_li = [ '.class', '.jar'] | |
filename_prefix = 'jar_checker_summary' | |
col_heading_li = ['artifact', 'size', 'content'] | |
home_dir = os.path.expanduser('~') | |
def gettimestamp(): | |
today = datetime.date.today() | |
return today.strftime("%Y%b%d") | |
def capture_process_output(filename): | |
cmd = 'jar tvf {0}'.format(filename) | |
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) | |
output = result.stdout | |
logging.debug("result output: %s", output) | |
return output | |
def myvisitor(fullpath): | |
logging.debug("fullpath: %s", fullpath) | |
try: | |
filename, file_extension = os.path.splitext(fullpath) | |
if file_extension in ext_li: | |
logging.debug("fullpath: %s", fullpath) | |
jar_output = capture_process_output(fullpath) | |
for line in jar_output.split('\n'): | |
logging.debug(">>>>line: %s", line) | |
# only process lines with output | |
if line: | |
line_li = line.split() | |
logging.debug(r'........line_li: <%s>', ','.join(line_li)) | |
size = line_li[0] | |
content = line_li[7] | |
content_filename, content_file_extension = os.path.splitext(content) | |
if content_file_extension in filetype_ext_li: | |
mywriter.writerow([fullpath, size, content]) | |
except Exception as err: | |
logging.warning("Error caught while visiting {}".format(fullpath)) | |
logging.warning("Error: {0}".format(err)) | |
traceback.print_exc() | |
def process_args(): | |
global dirlist, output_filename | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing. Multiple paths should be separated with a comma ','") | |
parser.add_argument("--output_dir", help="where the output file should be written to.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program searches jar files and outputs information about the content in csv format.") | |
args = parser.parse_args() | |
if args.start_dir: | |
dirlist = args.start_dir.split(',') | |
if args.output_dir: | |
output_filename = args.output_dir + os.path.sep + filename_prefix + "-" + gettimestamp() + ".csv" | |
else: | |
output_filename = home_dir + os.path.sep + filename_prefix + '-' + gettimestamp() + ".csv" | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
logging.debug("dirlist: %s", dirlist) | |
def main(): | |
if sys.version_info < (3,7,0): | |
print("Please use a version of Python > 3.7") | |
sys.exit(-1) | |
process_args() | |
global mywriter | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
mywriter.writerow(col_heading_li) | |
for root in dirlist: | |
logging.debug("Processing: %s", root) | |
for currentpath, dirs, files in os.walk(root): | |
for name in files: | |
fullpath = os.path.join(currentpath, name) | |
logging.debug("root_dir: %s, currentpath: %s, fullpath: %s", root, currentpath, fullpath) | |
myvisitor(fullpath) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import logging | |
import os | |
import re | |
import sys | |
import urllib.request | |
########## This program is used to search for urls in pdf files. | |
########## The pdf files should be downloaded to a local directory. | |
########## This program will test the urls for broken links. | |
########## global variables | |
start_dir = '' | |
output_dir = '' | |
ext_list = ['pdf'] | |
# key: url, value: urlInfo | |
links = {} | |
########## end global variables | |
class UrlInfo: | |
def __init__(self, url, hostname, files, count, responseCode, valid): | |
self.url = url | |
self.hostname = hostname | |
self.files = files | |
self.count = count | |
self.responseCode = responseCode | |
self.valid = valid | |
def openFileHelper(filename): | |
s = '' | |
with open(filename, 'rb') as fopen: | |
bytes = fopen.read() | |
# workaround to handle pdf files as they are binary format | |
s = bytes.decode('latin-1') | |
return s | |
# takes the filename of the file to search | |
def searchInFile(filename): | |
pattern = r'(http|https)://([a-zA-Z0-9\.#/%=_?-]*)' | |
# special characters | |
# # anchor | |
# % escape | |
# ? query string | |
# other special characters (not used by us): | |
# &, ~ (home directory location), + (plus sign) | |
text = openFileHelper(filename) | |
li = re.findall(pattern, text) | |
for item in li: | |
logging.debug('item is: {0}'.format(item)); | |
url = item[0] + '://' + item[1] | |
# get hostname name | |
hostname = '' | |
m = re.match(r'^([a-zA-Z0-9\.-]*)', item[1]) | |
if m: | |
hostname = m.group(1) | |
logging.debug('url is: {0}'.format(url)); | |
if not url in links.keys(): | |
#links[url] = 1 | |
urlInfo = UrlInfo(url, hostname, [filename], 1, 0, False) | |
links[url] = urlInfo | |
else: | |
urlInfo = links[url] | |
urlInfo.count += 1 | |
if filename not in urlInfo.files: | |
urlInfo.files.append(filename) | |
def testLinks(): | |
print("testing links...") | |
key = '' | |
for key, value in links.items(): | |
try: | |
responseCode = urllib.request.urlopen(key).getcode() | |
value.valid = True | |
value.responseCode = responseCode | |
except Exception as err: | |
logging.warning("Url: {0}, Error: {1}".format(key, err)) | |
#traceback.print_exc() | |
if isinstance(err, urllib.error.HTTPError): | |
#print('type is: ') | |
#print(type(err)) | |
value.responseCode = err.code | |
def outputLinks(): | |
keys = list(links.keys()) | |
keys.sort() | |
#numlinks = len(keys) | |
#print('The number of links: {0}'.format(numlinks)) | |
output_filename = os.path.sep.join([output_dir, 'linkchecker.csv']) | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
# header | |
mywriter.writerow(['url', 'hostname', 'in files', 'response code', 'valid', 'occurrences']) | |
for key in keys: | |
value = links[key] | |
mywriter.writerow([key, value.hostname, ','.join(value.files), value.responseCode, value.valid, value.count]) | |
#if not value.valid == False: | |
# print('url: {}, occurrences: {}'.format(key, valu:e.count)) | |
#else: | |
# print('url: {}, in files: {}, occurrences: {}'.format(key, ','.join(value.files), value.count)) | |
def process_args(): | |
global start_dir, output_dir, ext_list | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing.", required=True) | |
parser.add_argument("--output_dir", help="where the output file should be written to. If not specified it will be the same as start_dir.") | |
parser.add_argument("--ext_list", help="the list of file extensions to search in separated with commas. Default is pdf.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program is used to check files on disk for valid urls.") | |
args = parser.parse_args() | |
if args.start_dir: | |
start_dir = args.start_dir | |
if args.output_dir: | |
output_dir = args.output_dir | |
else: | |
output_dir = start_dir | |
if args.ext_list: | |
ext_list = args.ext_list.split(',') | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
logging.debug("start_dir is: " + start_dir) | |
logging.debug("ext_list is: " + ",".join(ext_list)) | |
def process(): | |
logging.debug("in process(), start_dir is: " + start_dir) | |
for root, dirs, files in os.walk(start_dir): | |
for name in files: | |
(base, extension) = os.path.splitext(name) | |
logging.debug("file name is: " + name) | |
logging.debug("base file name is: " + base) | |
if extension.startswith('.'): | |
ext = extension.lstrip('.') | |
ext_match = False | |
if ext_list: | |
if ext in ext_list: | |
ext_match = True | |
else: | |
ext_match = True | |
if ext_match: | |
input_filename = os.path.join(root, name) | |
searchInFile(input_filename) | |
testLinks() | |
outputLinks() | |
def main(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
process() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
import sys | |
import argparse | |
start_dir = os.path.expanduser('~') | |
output_dir = start_dir | |
heap_summary_cmd = '/home/dixson/work/tools/py/heap-summary.py' | |
def process_args(): | |
global start_dir, output_dir | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing.") | |
parser.add_argument("--output_dir", help="where the output file should be written to. If this is not set, this defaults to the start_dir") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program will parse a set of gc log files in a configured directory.") | |
args = parser.parse_args() | |
if args.start_dir: | |
start_dir = args.start_dir | |
if args.output_dir: | |
output_dir = args.output_dir | |
else: | |
output_dir = start_dir | |
def process(): | |
for root, dirs, files in os.walk(start_dir): | |
for name in files: | |
(base, extension) = os.path.splitext(name) | |
if extension.startswith('.'): | |
ext = extension.lstrip('.') | |
if ext.isdigit() or ext == 'current' or ext == 'log': | |
input_filename = os.path.join(root, name) | |
output_filename = input_filename + '.csv' | |
print(input_filename) | |
print(output_filename) | |
with open(output_filename, "w") as outfile: | |
subprocess.run(['python3', heap_summary_cmd, input_filename], stdout=outfile) | |
def main(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
process() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import re | |
import shutil | |
import sys | |
multiplespaceregex = r'[\s]+' | |
filename = '' | |
search_text_file = '' | |
replacement_text_file = '' | |
# replace any white space characters with a regular expression for white space | |
def replaceWhiteSpace(s): | |
whitespacefound = False | |
searchstr = '' | |
for ch in s: | |
logging.debug(ch) | |
m = re.match(r'[\s]', ch) | |
if m: | |
logging.debug('I found whitespace') | |
if whitespacefound == False: | |
whitespacefound = True | |
else: | |
if whitespacefound == True: | |
searchstr += multiplespaceregex | |
searchstr += ch | |
whitespacefound = False | |
if whitespacefound == True: | |
searchstr += multiplespaceregex | |
return searchstr | |
def searchInFile(filename, searchstr): | |
text = openFileHelper(filename) | |
pattern = re.compile(searchstr) | |
m = pattern.search(text) | |
if m: | |
return True | |
else: | |
return False | |
def process_args(): | |
global filename, search_text_file, replacement_text_file | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--filename", help="the file to search.") | |
parser.add_argument("--search_text_file", help="the text block to search and replace for.") | |
parser.add_argument("--replacement_text_file", help="the replacement text block.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program helps to replace search replace block text.") | |
args = parser.parse_args() | |
if args.filename: | |
filename = args.filename | |
if args.search_text_file: | |
search_text_file = args.search_text_file | |
if args.replacement_text_file: | |
replacement_text_file = args.replacement_text_file | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
def openFileHelper(filename): | |
f = open(filename, 'r') | |
s = f.read() | |
f.close() | |
return s | |
def main(): | |
global filename, search_text_file, replacement_text_file | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
original_text = openFileHelper(filename) | |
search_text = openFileHelper(search_text_file) | |
replacement_text = openFileHelper(replacement_text_file) | |
#searchstr = replaceWhiteSpace(search_text) | |
searchstr = search_text | |
logging.debug("searchstr..........") | |
logging.debug(searchstr) | |
found = searchInFile(filename, searchstr) | |
if found: | |
# copy file | |
dst = filename + '~' | |
shutil.copy(filename, dst) | |
pattern = re.compile(searchstr) | |
logging.debug("replacement text..........") | |
logging.debug(replacement_text) | |
replaced_text = pattern.sub(replacement_text, original_text) | |
logging.debug("replaced text..........") | |
logging.debug(replaced_text) | |
f = open(filename, "w") | |
n = f.write(replaced_text) | |
f.close() | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import logging | |
import os | |
import re | |
import shutil | |
import sys | |
########## This program replaces find . -exec sed 's/a/b/g' {} \; because certain characters like backslash were too difficult to handle using bash | |
########## global variables | |
start_dir = '' | |
search_regex_file = '' | |
replacement_text_file = '' | |
ext_list = [] | |
########## end global variables | |
def openFileHelper(filename): | |
f = open(filename, 'r') | |
s = f.read() | |
f.close() | |
return s | |
# takes the filename of the file to search | |
# pattern is the regex pattern to search for | |
def searchInFile(filename, pattern): | |
text = openFileHelper(filename) | |
#pattern = re.compile(searchregex) | |
m = pattern.search(text) | |
if m: | |
return True | |
else: | |
return False | |
def process_args(): | |
global start_dir, search_regex_file, replacement_text_file, ext_list | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing.", required=True) | |
parser.add_argument("--search_regex_file", help="the file containing the regex to search for. The file should contain a single line and trailing whitespace will be stripped.", required=True) | |
parser.add_argument("--replacement_text_file", help="the file containing the replacement string. The file should contain a single line and trailing whitespace will be stripped.", required=True) | |
parser.add_argument("--ext_list", help="the list of file extensions to search in separated with commas.") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level. Default is WARNING.") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program helps to search and replace text.") | |
args = parser.parse_args() | |
if args.start_dir: | |
start_dir = args.start_dir | |
if args.search_regex_file: | |
search_regex_file = args.search_regex_file | |
if args.replacement_text_file: | |
replacement_text_file = args.replacement_text_file | |
if args.ext_list: | |
ext_list = args.ext_list.split(',') | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
logging.debug("start_dir is: " + start_dir) | |
logging.debug("search_regex_file is: " + search_regex_file) | |
logging.debug("replacement_text_file is: " + replacement_text_file) | |
logging.debug("ext_list is: " + ",".join(ext_list)) | |
def process(pattern): | |
for root, dirs, files in os.walk(start_dir): | |
for name in files: | |
(base, extension) = os.path.splitext(name) | |
if extension.startswith('.'): | |
ext = extension.lstrip('.') | |
ext_match = False | |
if ext_list: | |
if ext in ext_list: | |
ext_match = True | |
else: | |
ext_match = True | |
if ext_match: | |
input_filename = os.path.join(root, name) | |
found = searchInFile(input_filename, pattern) | |
if found: | |
# create backup copy | |
backup_filename = input_filename + '~' | |
shutil.copy(input_filename, backup_filename) | |
original_text = openFileHelper(input_filename) | |
# replace text | |
replaced_text = pattern.sub(replacement_text, original_text) | |
# save to original file | |
f = open(input_filename, "w") | |
n = f.write(replaced_text) | |
f.close() | |
def main(): | |
global search_regex, replacement_text | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
search_regex = openFileHelper(search_regex_file).rstrip() | |
replacement_text = openFileHelper(replacement_text_file).rstrip() | |
logging.debug("search_regex is: " + search_regex) | |
logging.debug("replacement_text is: " + replacement_text) | |
pattern = re.compile(search_regex) | |
process(pattern) | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# very simple, no need to download web framework and deploy app, simply navigate to directory. directory serves as document root | |
python3 -m http.server 8888 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import logging | |
import os | |
import sys | |
import re | |
import traceback | |
''' | |
This code parses a Java thread dump txt file and outputs it to a csv file, | |
for easier analysis. | |
It creates 2 csv files. It creates a .csv and -summary.csv file. | |
It takes the original thread dump file name and removes the .txt suffix and appends the above suffixes. | |
Sometimes the thread dump was generated with a long listing and will contain additional fields. | |
This program will attempt to parse using a long listing and a simple listing strategy. | |
It is normal to see some errors, as one strategy will fail. | |
''' | |
# selected comma for main separator, choose another separator | |
SUB_SEPARATOR = '|' | |
title = '' | |
jni_global_references = '' | |
heap = '' | |
start_dir = r'C:\Users\Dixson\Downloads\support\logs\test' | |
home_dir = os.path.expanduser('~') | |
output_dir = '' | |
print_runnable = True | |
print_thread_count = False | |
print_other_thread_summary = False | |
class Substate: | |
def __init__(self, msg, objectid, otherClassName): | |
self.msg = msg | |
self.objectid = objectid | |
self.otherClassName = otherClassName | |
# strategy | |
# Enhancement JDK-8200720 allows for additional fields | |
class EnhancedLongListingStrategy(object): | |
def __init__(self): | |
self.name = 'EnhancedLongListingStrategy' | |
self.col_li = ['name', 'number', 'type', 'priority', 'os_priority', 'cpu', 'elapsed', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack'] | |
def process_threadprop(self, s): | |
# replace with underscores for easier parsing | |
s = s.replace('waiting on condition', 'waiting_on_condition') | |
s = s.replace('in Object.wait()', 'in_Object.wait()') | |
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry') | |
#logging.debug("s: {}".format(s)) | |
thread_name = '' | |
threadprop = {} | |
# extract thread name | |
m = re.match(r'"(.*)"(.*)$', s) | |
if m: | |
thread_name = m.group(1) | |
substring = m.group(2) | |
if (s.find('daemon') > -1): | |
# general case, most threads 'labelled' daemon | |
li = substring.split() | |
thread_no = li[0].lstrip('#') | |
thread_type = li[1] | |
thread_priority = li[2] | |
thread_ospriority = li[3] | |
thread_cpu = li[4] | |
thread_elapsed = li[5] | |
thread_tid = li[6] | |
thread_nid = li[7] | |
thread_status = li[8] | |
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000] | |
if len(li) >= 8: | |
thread_address = li[9] | |
else: | |
thread_address = '' | |
threadprop = {'name': thread_name, 'number': thread_no, 'type': thread_type, 'priority': thread_priority, 'os_priority': thread_ospriority,'cpu': thread_cpu, 'elapsed': thread_elapsed, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address} | |
else: | |
# threads not labelled 'daemon' | |
logging.debug('substring {}'.format(substring)) | |
m = re.match(r' #(\d+) (.*)$', substring) | |
if m: | |
thread_no = m.group(1) | |
substring = m.group(2) | |
li = substring.split() | |
thread_priority = li[0] | |
thread_ospriority = li[1] | |
thread_cpu = li[2] | |
thread_elapsed = li[3] | |
thread_tid = li[4] | |
thread_nid = li[5] | |
thread_status = li[6] | |
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000] | |
if len(li) > 6: | |
thread_address = li[7] | |
else: | |
thread_address = '' | |
threadprop = {'name': thread_name, 'number': thread_no, 'priority': thread_priority, 'os_priority': thread_ospriority, 'cpu': thread_cpu, 'elapsed':thread_elapsed, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address} | |
else: | |
# jvm threads - only display basic information | |
# "G1 Conc#0" os_prio=0 cpu=1453.41ms elapsed=52307.25s tid=0x00007f912406ded0 nid=0x1cafd5 runnable | |
li = substring.split() | |
thread_ospriority = li[0] | |
thread_cpu = li[1] | |
thread_elapsed = li[2] | |
thread_tid = li[3] | |
thread_nid = li[4] | |
thread_status = li[5] | |
threadprop = {'name' : thread_name, 'os_priority' : thread_ospriority, 'cpu': thread_cpu, 'elapsed' : thread_elapsed, 'tid' : thread_tid, 'nid' : thread_nid, 'status' : thread_status} | |
return threadprop | |
# generated with jstack -l | |
class LongListingStrategy(object): | |
def __init__(self): | |
self.name = 'LongListingStrategy' | |
self.col_li = ['name', 'number', 'type', 'priority', 'os_priority', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack'] | |
def process_threadprop(self, s): | |
# replace with underscores for easier parsing | |
s = s.replace('waiting on condition', 'waiting_on_condition') | |
s = s.replace('in Object.wait()', 'in_Object.wait()') | |
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry') | |
#logging.debug("s: {}".format(s)) | |
thread_name = '' | |
threadprop = {} | |
# extract thread name | |
m = re.match(r'"(.*)"(.*)$', s) | |
if m: | |
thread_name = m.group(1) | |
substring = m.group(2) | |
# general case, most threads 'labelled' daemon | |
if (s.find('daemon') > -1): | |
li = substring.split() | |
thread_no = li[0].lstrip('#') | |
thread_type = li[1] | |
thread_priority = li[2] | |
thread_ospriority = li[3] | |
thread_tid = li[4] | |
thread_nid = li[5] | |
thread_status = li[6] | |
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000] | |
if len(li) >= 8: | |
thread_address = li[7] | |
else: | |
thread_address = '' | |
threadprop = {'name': thread_name, 'number': thread_no, 'type': thread_type, 'priority': thread_priority, 'os_priority': thread_ospriority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address} | |
else: | |
#"RMI Reaper" #14 prio=5 os_prio=0 tid=0x00007f2bd1d3f800 nid=0x2161 in Object.wait() [0x00007f2106550000] | |
#"main" #1 prio=5 os_prio=0 tid=0x00007f2bd000b800 nid=0x20ab waiting on condition [0x00007f2bd5f79000] | |
#"main" #1 prio=5 os_prio=0 tid=0x00007f79c000d800 nid=0x9091 sleeping[0x00007f79c8305000] | |
#"GS-swiftJmsSenderContainer-1" #205 prio=5 os_prio=0 tid=0x00007f684645a000 nid=0x6156 sleeping[0x00007f6735dea000] | |
m = re.match(r' #(\d+) (.*)$', substring) | |
if m: | |
thread_no = m.group(1) | |
substring = m.group(2) | |
li = substring.split() | |
thread_priority = li[0] | |
thread_ospriority = li[1] | |
thread_tid = li[2] | |
thread_nid = li[3] | |
thread_status = li[4] | |
if len(li) > 5: | |
thread_address = li[5] | |
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000] | |
if len(li) >= 6: | |
thread_address = li[5] | |
else: | |
thread_address = '' | |
threadprop = {'name': thread_name, 'number': thread_no, 'priority': thread_priority, 'os_priority': thread_ospriority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address} | |
else: | |
# jvm threads only display basic information | |
li = substring.split() | |
thread_ospriority = li[0] | |
thread_tid = li[1] | |
thread_nid = li[2] | |
thread_status = li[3] | |
threadprop = {'name' : thread_name, 'os_priority' : thread_ospriority, 'tid' : thread_tid, 'nid' : thread_nid, 'status' : thread_status} | |
return threadprop | |
# generated with jstack; missing thread number and os_priority | |
class SimpleListingStrategy(object): | |
def __init__(self): | |
self.name = 'SimpleListingStrategy' | |
self.col_li = ['name', 'type', 'priority', 'tid', 'nid', 'status', 'state', 'substate', 'address', 'stack', 'locked_ownable_synchronizers'] | |
def process_threadprop(self, s): | |
# replace with underscores for easier parsing | |
s = s.replace('waiting on condition', 'waiting_on_condition') | |
s = s.replace('in Object.wait()', 'in_Object.wait()') | |
s = s.replace('waiting for monitor entry', 'waiting_for_monitor_entry') | |
thread_name = '' | |
threadprop = {} | |
# extract thread name | |
m = re.match(r'"(.*)"(.*)$', s) | |
if m: | |
thread_name = m.group(1) | |
substring = m.group(2) | |
# general case, most threads 'labelled' daemon | |
if (s.find('daemon') > -1): | |
li = substring.split() | |
thread_type = li[0] | |
thread_priority = li[1] | |
thread_tid = li[2] | |
thread_nid = li[3] | |
thread_status = li[4] | |
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000] | |
if len(li) >= 6: | |
thread_address = li[5] | |
else: | |
thread_address = '' | |
threadprop = {'name': thread_name, 'type': thread_type, 'priority': thread_priority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address} | |
else: | |
li = substring.split() | |
if len(li) > 2: | |
thread_priority = li[0] | |
thread_tid = li[1] | |
thread_nid = li[2] | |
thread_status = li[3] | |
# some thread dumps show status with address, eg. sleeping[0x00007f297b44f000] | |
if len(li) >= 5: | |
thread_address = li[4] | |
else: | |
thread_address = '' | |
threadprop = {'name': thread_name, 'priority': thread_priority, 'tid': thread_tid, 'nid': thread_nid, 'status': thread_status, 'address': thread_address} | |
else: | |
#"GS-GSPingManager:com.gigaspaces.internal.lrmi.stubs.LRMISpaceImpl:1632991357520" Id=721 TIMED_WAITING | |
thread_id = li[0] | |
thread_status = li[1] | |
threadprop = {'name': thread_name, 'tid' : thread_id, 'status' : thread_status} | |
return threadprop | |
# end strategy | |
# an indented line containing java.lang.Thread.State is usually the first line of the block | |
def process_state(li): | |
if( len(li) > 0): | |
#logging.debug(li[0]) | |
m = re.match(r'^\s+java\.lang\.Thread\.State: (.*)$', li[0]) | |
if m: | |
return m.group(1) | |
#if( block_li[0].find('java.lang.Thread.State:') > -1): | |
else: | |
return '' | |
else: | |
return '' | |
# a stack trace may have additional information I call substate | |
def process_substate(li): | |
#logging.debug("In process substate") | |
#logging.debug("li is: " + ''.join(li)) | |
substateObj = None | |
substate_li = [] | |
substateObj_li = [] | |
for s in li: | |
s = s.strip() | |
logging.debug("s is: '" + s + "'") | |
if( s.startswith('-')): | |
substate_li.append(s) | |
m = re.match(r'-(.*)<(.*)> \(a (.*)\)', s) | |
if m: | |
msg = m.group(1).strip() | |
objectid = m.group(2) | |
classname = m.group(3) | |
logging.debug("match found") | |
subStateObj = Substate(msg, objectid, classname) | |
substateObj_li.append(subStateObj) | |
return (SUB_SEPARATOR.join(substate_li), substateObj_li) | |
def process_stack(li): | |
stack_li = [] | |
for s in li: | |
s = s.strip() | |
stack_li.append(s) | |
#logging.debug("begin>>>>> %s" % SUB_SEPARATOR.join(stack_li)) | |
#logging.debug("end>>>>>>>") | |
return SUB_SEPARATOR.join(stack_li) | |
def process_heap(li): | |
heap_li = [] | |
for s in li: | |
s = s.strip() | |
heap_li.append(s) | |
s = SUB_SEPARATOR.join(heap_li) | |
return s.replace(',', '\'') | |
# the information in this block occurs below the stack trace | |
def process_locked_ownable_sync(block_li): | |
#logging.debug("block_li in locked_ownable_synchronizers: {}".format(block_li)) | |
if not block_li: | |
return '' | |
length = len(block_li) | |
for n in range(0, length): | |
s = block_li[n] | |
if s.find('Locked ownable synchronizers:') > -1 : | |
# return value in next line | |
if n + 1 < length: | |
return block_li[n + 1].strip().lstrip('-') | |
return '' | |
def process_block(strategy, block_li, nextblock_li, threadprop_by_name): | |
global title, jni_global_references, heap | |
logging.debug("BEGIN BLOCK") | |
logging.debug(block_li) | |
logging.debug("END BLOCK") | |
s = block_li[0] | |
if (s.startswith('"')): | |
# thread name found | |
threadprop = strategy.process_threadprop(s) | |
threadprop['state'] = process_state(block_li[1:]) | |
threadprop['block'] = block_li[1:] | |
# there can be more than 1 thread referenced | |
(substate, substateObj) = process_substate(block_li[1:]) | |
threadprop['substate'] = substate | |
threadprop['substateObj'] = substateObj | |
threadprop['stack'] = process_stack(block_li[1:]) | |
threadprop['locked_ownable_synchronizers'] = process_locked_ownable_sync(nextblock_li) | |
threadprop_by_name[threadprop['name']] = threadprop | |
elif (s.startswith('Full thread dump')): | |
title = s | |
elif (s.startswith('JNI global references') or s.startswith('JNI global refs')): | |
jni_global_references = s | |
elif (s == 'Heap'): | |
heap = process_heap(block_li[1:]) | |
else: | |
logging.debug('Skipping block that starts with line: {}'.format(s)) | |
return threadprop_by_name | |
# print substate in another format for easy viewing | |
# print thread name, id, status, object id, classname | |
def print_substate(threadprop_by_name, mywriter): | |
mywriter.writerow(['substate (redux)', 'thread', 'tid', 'msg', 'other_oid', 'other_classname (e.g, locked/waiting on)']) | |
for k in threadprop_by_name.keys(): | |
thread_name = k | |
#logging.debug(threadprop_by_name[k]) | |
threadprop = threadprop_by_name[k] | |
tid = threadprop['tid'].split('=')[1] | |
if 'substateObj' in threadprop: | |
substatusObj_li = threadprop_by_name[k]['substateObj'] | |
for substatusObj in substatusObj_li: | |
mywriter.writerow(['',thread_name, tid, substatusObj.msg, substatusObj.objectid, substatusObj.otherClassName ]) | |
def print_runnable_stack(threadprop_by_name, mywriter): | |
mywriter.writerow(['runnable', 'thread (in state RUNNABLE)', 'stack']) | |
for k in threadprop_by_name.keys(): | |
thread_name = k | |
threadprop = threadprop_by_name[k] | |
state = threadprop['state'] | |
if state == 'RUNNABLE': | |
# re-format original stack trace | |
block = [line.strip() for line in threadprop['block'][1:]] | |
block_s = "\n".join(block) | |
mywriter.writerow(['',thread_name,block_s]) | |
def count_occurrences(threadprop_by_name, field, mywriter, column_name): | |
logging.debug("field is: " + field); | |
values = [] | |
count_dict = {} | |
for k in threadprop_by_name.keys(): | |
value = threadprop_by_name[k][field] | |
values.append(value) | |
for item in values: | |
if item in count_dict: | |
count = count_dict[item] | |
count += 1 | |
count_dict[item] = count | |
else: | |
count_dict[item] = 1 | |
mywriter.writerow([column_name, 'value', 'count']) | |
''' | |
for key, value in sorted(count_dict.iteritems(), key=lambda (k,v): (v,k), reverse=True): | |
#print ", %s, %s" % (key[:160], value) | |
mywriter.writerow(['', key[:160], value]) | |
''' | |
sorted_keys = sorted(count_dict.keys()) | |
for key in sorted_keys: | |
value = count_dict[key] | |
s = key[:160] | |
if not s: | |
s = "EMPTY" | |
mywriter.writerow(['', s, value]) | |
def print_threads(strategy, threadprop_by_name, mywriter): | |
mywriter.writerow(strategy.col_li) | |
mywriter.writerow(['Title', title]) | |
mywriter.writerow(['JNI global references', jni_global_references]) | |
if heap: | |
mywriter.writerow(['Heap', heap]) | |
mywriter.writerow([]) | |
mywriter.writerow(['** Begin threads **']) | |
keys = sorted(threadprop_by_name.keys()) | |
#keys.sort() | |
for k in keys: | |
#logging.debug('%s => %s' % (k, threadprop_by_name[k])) | |
li = [] | |
threadprop_dict = threadprop_by_name[k] | |
for col in strategy.col_li: | |
if col in threadprop_dict: | |
s = threadprop_dict[col] | |
else: | |
s = '' | |
s = s if not None else '' | |
li.append(s) | |
mywriter.writerow(li) | |
def write_csv(strategy, threadprop_by_name, filename): | |
# write output to csv file | |
# output compilation of thread properties | |
filename_woext, file_extension = os.path.splitext(filename) | |
output_filename = output_dir + os.path.sep + filename_woext + '.csv' | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
print_threads(strategy, threadprop_by_name, mywriter) | |
# output summary | |
output_filename = output_dir + os.path.sep + filename_woext + '-summary.csv' | |
with open(output_filename, 'w', newline='') as csvfile: | |
mywriter = csv.writer(csvfile) | |
if print_thread_count == True: | |
count_occurrences(threadprop_by_name, 'status', mywriter, 'status') | |
count_occurrences(threadprop_by_name, 'state', mywriter, 'state') | |
count_occurrences(threadprop_by_name, 'substate', mywriter, 'linked to') | |
# an application with many threads in a certain section of code may indicate a problem | |
count_occurrences(threadprop_by_name, 'stack', mywriter, 'stack (first few lines of)') | |
if print_other_thread_summary == True: | |
print_substate(threadprop_by_name, mywriter) | |
if print_runnable == True: | |
print_runnable_stack(threadprop_by_name, mywriter) | |
def process_file(fullpathname, filename): | |
line_number = 0 | |
f = open(fullpathname) | |
# allblock_li is all the thread text sections saved to a list | |
allblock_li = [] | |
# current_block_li is a text section containing information for a single thread | |
current_block_li = [] | |
# k thread name -> v dictionary with key (column heading or property name), value pairs for that thread | |
threadprop_by_name = {} | |
firsttime = True | |
for line in f: | |
line_number += 1 | |
s = line.rstrip() | |
#logging.debug(">> %d: %s" % (line_number, s)) | |
# lines beginning with white space | |
m = re.match(r'^(\s)+(.*)$', s) | |
# separate lines in file into sections, ie, block | |
# save for future processing | |
# need to be able to look ahead into block and next block | |
if( not m): | |
# new block found | |
if( firsttime == False ): | |
#threadprop_by_name = process_block(strategy, current_block_li, threadprop_by_name) | |
allblock_li.append(current_block_li) | |
else: | |
firsttime = False | |
# reset current_block_li | |
current_block_li = [s] | |
else: | |
current_block_li.append(s) | |
allblock_li.append(current_block_li) | |
#threadprop_by_name = process_block(strategy, current_block_li, threadprop_by_name) | |
# initialize strategies | |
strategy_li = [EnhancedLongListingStrategy(), LongListingStrategy(), SimpleListingStrategy()] | |
# try each strategy until one processes cleanly | |
for strategy in strategy_li: | |
try: | |
threadprop_by_name = {} | |
length = len(allblock_li) | |
for n in range(0, length): | |
#for block in allblock_li: | |
block = allblock_li[n] | |
if (n+1 >= length): | |
nextblock = None | |
else: | |
nextblock = allblock_li[n+1] | |
threadprop_by_name = process_block(strategy, block, nextblock, threadprop_by_name) | |
write_csv(strategy, threadprop_by_name, filename) | |
# if this succeeds, no need to try next strategy | |
break | |
except Exception as err: | |
logging.warning("Error caught while parsing {} using strategy {}".format(filename, strategy.name)) | |
logging.warning("Error: {0}".format(err)) | |
traceback.print_exc() | |
def process_args(): | |
global start_dir, output_dir, print_runnable, print_thread_count, print_other_thread_summary | |
parser = argparse.ArgumentParser(add_help=False) | |
parser.add_argument("--start_dir", help="the root directory to begin processing") | |
parser.add_argument("--output_dir", help="where the output file should be written to") | |
parser.add_argument("--print_runnable", help="print the stack traces of the runnable threads. Default is true") | |
parser.add_argument("--print_thread_count", help="print a summary of thread counts by class. Default is false") | |
parser.add_argument("--print_other_thread_summary", help="print a summary of the referenced threads. Default is false") | |
parser.add_argument("--log_level", choices=['CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG'], help="logging level") | |
parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help="This program parses a set of thread dump files generated with jstack or kill -3.") | |
args = parser.parse_args() | |
if args.start_dir: | |
start_dir = args.start_dir | |
if args.output_dir: | |
output_dir = args.output_dir + os.path.sep | |
else: | |
output_dir = home_dir + os.path.sep | |
if args.print_runnable: | |
if args.print_runnable.lower() == 'false' or args.print_runnable.lower() == 'f': | |
print_runnable = False | |
if args.print_thread_count: | |
if args.print_thread_count.lower() == 'true' or args.print_thread_count.lower() == 't': | |
print_thread_count = True | |
if args.print_other_thread_summary: | |
if args.print_other_thread_summary.lower() == 'true' or args.print_other_thread_summary.lower() == 't': | |
print_other_thread_summary = True | |
if args.log_level: | |
if args.log_level == 'CRITICAL': | |
logging.basicConfig(level=logging.CRITICAL) | |
elif args.log_level == 'ERROR': | |
logging.basicConfig(level=logging.ERROR) | |
elif args.log_level == 'INFO': | |
logging.basicConfig(level=logging.INFO) | |
elif args.log_level == 'DEBUG': | |
logging.basicConfig(level=logging.DEBUG) | |
else: | |
logging.basicConfig(level=logging.WARNING) | |
else: | |
# set logging level. WARNING is default level | |
logging.basicConfig(level=logging.WARNING) | |
def main(): | |
if sys.version_info < (3,0,0): | |
print("Please use a version of Python > 3") | |
sys.exit(-1) | |
process_args() | |
for start, dirs, files in os.walk(start_dir): | |
for name in files: | |
if name.endswith('txt') or name.endswith('tdump'): | |
process_file(os.path.join(start, name), name) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment