imanabu · May 16, 2024 23:53
diff --git a/NeptuneSlowQueryParse.py b/NeptuneSlowQueryParse.py
 # %% [markdown]
 # # Neptune Slow Query Log Analysis

 # %%
 log_file_name = "slowquery.txt"

 # %%
 import json

 def read_file_lines(filename):
  lines = []

  with open(filename, 'r') as f:
    for line in f:
      # Remove trailing newline character
      lines.append(line.rstrip())
  return lines

 def parse():
    items = []
    for line in lines:
        # print(line)
        parts = line.split("\t")

        # Extract the time (first part)
        timestamp = parts[0]

        # Extract the JSON component (second part)
        try:
            json_data = json.loads(parts[1])
        except json.JSONDecodeError:
        # Handle potential errors if the JSON is malformed
            print("Error: Invalid JSON format")
            json_data = None
        items.append({ "timestamp": timestamp, "data": json_data})
    return items

 def extract_query(x):
    query_stats = x["data"].get("queryStats")
    if query_stats:
        return query_stats.get("query")
    pass

 def extract_fingerprint(x):
    query_stats = x["data"].get("queryStats")
    if query_stats:
        return query_stats.get("queryFingerprint")
    pass

 def extract_pattern(items, item):
    p = extract_fingerprint(item)
    count = 0
    for x in items:
        q = extract_fingerprint(x)
        if p == q:
            count = count + 1
    return count

 def high_mem(items):
    highest = 0
    usage = 0
    highest_item = {}
    for item in items:
        mem = item["data"].get("memoryStats")
        if mem is not None:
            usage = mem.get("approximateUsedMemoryBytes")
            if usage is not None and usage > highest:
                highest = usage
                highest_item = item
    print("This is the offending query.")
    print(f"It used up {highest/1024/1024} Mb")
    print(extract_query(highest_item))
    total = len(items)
    count = extract_pattern(items, highest_item)
    print(f"{count} out of {total} has the same query pattern.")


 # --- MAIN CODE --------------------------------------------------------

 lines = read_file_lines(log_file_name)
 size = len(lines)
 print(f"{size} long query lines loaded.")

 items = parse()

 high_mem(items)




 # %%
	# %% [markdown]
	# # Neptune Slow Query Log Analysis

	# %%
	log_file_name = "slowquery.txt"

	# %%
	import json

	def read_file_lines(filename):
	lines = []

	with open(filename, 'r') as f:
	for line in f:
	# Remove trailing newline character
	lines.append(line.rstrip())
	return lines

	def parse():
	items = []
	for line in lines:
	# print(line)
	parts = line.split("\t")

	# Extract the time (first part)
	timestamp = parts[0]

	# Extract the JSON component (second part)
	try:
	json_data = json.loads(parts[1])
	except json.JSONDecodeError:
	# Handle potential errors if the JSON is malformed
	print("Error: Invalid JSON format")
	json_data = None
	items.append({ "timestamp": timestamp, "data": json_data})
	return items

	def extract_query(x):
	query_stats = x["data"].get("queryStats")
	if query_stats:
	return query_stats.get("query")
	pass

	def extract_fingerprint(x):
	query_stats = x["data"].get("queryStats")
	if query_stats:
	return query_stats.get("queryFingerprint")
	pass

	def extract_pattern(items, item):
	p = extract_fingerprint(item)
	count = 0
	for x in items:
	q = extract_fingerprint(x)
	if p == q:
	count = count + 1
	return count

	def high_mem(items):
	highest = 0
	usage = 0
	highest_item = {}
	for item in items:
	mem = item["data"].get("memoryStats")
	if mem is not None:
	usage = mem.get("approximateUsedMemoryBytes")
	if usage is not None and usage > highest:
	highest = usage
	highest_item = item
	print("This is the offending query.")
	print(f"It used up {highest/1024/1024} Mb")
	print(extract_query(highest_item))
	total = len(items)
	count = extract_pattern(items, highest_item)
	print(f"{count} out of {total} has the same query pattern.")


	# --- MAIN CODE --------------------------------------------------------

	lines = read_file_lines(log_file_name)
	size = len(lines)
	print(f"{size} long query lines loaded.")

	items = parse()

	high_mem(items)




	# %%