Skip to content

Instantly share code, notes, and snippets.

@imanabu
Last active May 16, 2024 23:53
Show Gist options
  • Save imanabu/d3844e5e1f75f80b70f4a9605590be0d to your computer and use it in GitHub Desktop.
Save imanabu/d3844e5e1f75f80b70f4a9605590be0d to your computer and use it in GitHub Desktop.
Neptune Slow Query Parser and Most Offensive Query Discovery Jupyter Notebook Example
# %% [markdown]
# # Neptune Slow Query Log Analysis
# %%
log_file_name = "slowquery.txt"
# %%
import json
def read_file_lines(filename):
lines = []
with open(filename, 'r') as f:
for line in f:
# Remove trailing newline character
lines.append(line.rstrip())
return lines
def parse():
items = []
for line in lines:
# print(line)
parts = line.split("\t")
# Extract the time (first part)
timestamp = parts[0]
# Extract the JSON component (second part)
try:
json_data = json.loads(parts[1])
except json.JSONDecodeError:
# Handle potential errors if the JSON is malformed
print("Error: Invalid JSON format")
json_data = None
items.append({ "timestamp": timestamp, "data": json_data})
return items
def extract_query(x):
query_stats = x["data"].get("queryStats")
if query_stats:
return query_stats.get("query")
pass
def extract_fingerprint(x):
query_stats = x["data"].get("queryStats")
if query_stats:
return query_stats.get("queryFingerprint")
pass
def extract_pattern(items, item):
p = extract_fingerprint(item)
count = 0
for x in items:
q = extract_fingerprint(x)
if p == q:
count = count + 1
return count
def high_mem(items):
highest = 0
usage = 0
highest_item = {}
for item in items:
mem = item["data"].get("memoryStats")
if mem is not None:
usage = mem.get("approximateUsedMemoryBytes")
if usage is not None and usage > highest:
highest = usage
highest_item = item
print("This is the offending query.")
print(f"It used up {highest/1024/1024} Mb")
print(extract_query(highest_item))
total = len(items)
count = extract_pattern(items, highest_item)
print(f"{count} out of {total} has the same query pattern.")
# --- MAIN CODE --------------------------------------------------------
lines = read_file_lines(log_file_name)
size = len(lines)
print(f"{size} long query lines loaded.")
items = parse()
high_mem(items)
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment