Created
May 1, 2024 18:38
-
-
Save esoergel/8f6ed01396e4b09cd920f3f0e8216641 to your computer and use it in GitHub Desktop.
Script for interpreting elasticsearch profile JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Script for interpreting elasticsearch profile JSON. It traverses the profile | |
data per shard, extracting all leaf nodes that took more than 1ms to process. | |
These are then (roughly) joined across shards and aggregated. The hope is that | |
this will clearly highlight the worst offenders in the query. | |
Usage: | |
$ ./interpret_es_profile.py es_profile_3_main_2024-05-01T16.15.57.json | |
Overall time per shard | |
Avg: 10.2687 - Min: 0.0203 - Med: 10.0286 - Max: 28.2535 - StDev: 9.1360 - count 96 | |
Leaf nodes over 1ms | |
9.7365 s: TermInSetQuery - _uid:case#********************* _uid:case#************************ [...] | |
Avg: 9.7365 - Min: 0.0064 - Med: 9.4433 - Max: 27.4775 - StDev: 8.8363 - count 96 | |
0.2373 s: MultiTermQueryConstantScoreWrapper - case_properties.value:mylastname~1 | |
Avg: 0.2373 - Min: 0.0014 - Med: 0.2454 - Max: 0.5847 - StDev: 0.1786 - count 93 | |
0.2225 s: TermQuery - case_properties.value.phonetic:X1011 | |
Avg: 0.2225 - Min: 0.0014 - Med: 0.0572 - Max: 4.2817 - StDev: 0.5291 - count 94 | |
0.0595 s: MultiTermQueryConstantScoreWrapper - case_properties.value:myfirstname~1 | |
Avg: 0.0595 - Min: 0.0010 - Med: 0.0533 - Max: 0.1843 - StDev: 0.0494 - count 84 | |
""" | |
import argparse | |
import json | |
from dataclasses import dataclass, field | |
from textwrap import shorten | |
from typing import List | |
import numpy | |
@dataclass() | |
class TimeArray: | |
"""Stores a list of evaluation times per shard, in nanoseconds""" | |
times: List[int] = field(default_factory=list) | |
def append(self, time): | |
self.times.append(time) | |
@property | |
def average(self): | |
return sum(self.times) / len(self.times) | |
def __str__(self): | |
return " - ".join(f"{name}: {val / 10**9:>7.4f}" for name, val in [ | |
("Avg", self.average), | |
("Min", min(self.times)), | |
("Med", numpy.median(self.times)), | |
("Max", max(self.times)), | |
("StDev", numpy.std(self.times)), | |
]) + f" - count {len(self.times)}" | |
def __repr__(self): | |
return f"TimeArray(avg={self.average / 10**9})" | |
def parse_profile(profile_json): | |
return [shard['searches'][0]['query'][0] | |
for shard in profile_json['shards']] | |
def show_speed_per_shard(shards): | |
time = TimeArray([shard['time_in_nanos'] for shard in shards]) | |
print("\nOverall time per shard") | |
print(f" {time}") | |
def find_leaf_nodes(shard, threshold=None): | |
# Ignore nodes under a millisecond | |
if shard['time_in_nanos'] > 10**6: | |
if 'children' in shard: | |
for child in shard['children']: | |
yield from find_leaf_nodes(child, threshold) | |
else: | |
# It's a leaf node | |
yield shard | |
def worst_leaf_nodes(shards): | |
times_by_query = {} | |
for shard in shards: | |
for leaf_node in find_leaf_nodes(shard): | |
key = (leaf_node['type'], leaf_node['description']) | |
if key not in times_by_query: | |
times_by_query[key] = TimeArray() | |
times_by_query[key].append(leaf_node['time_in_nanos']) | |
return times_by_query | |
def show_worst_leaf_nodes(shards): | |
times = sorted(worst_leaf_nodes(shards).items(), | |
key=lambda x: x[1].average, reverse=True) | |
print("\nLeaf nodes over 1ms") | |
for (qtype, description), time in times: | |
description = shorten(description, 150) | |
print(f"{time.average / 10**9:<7.4f}s: {qtype} - {description}") | |
print(f" {time}") | |
def main(): | |
parser = argparse.ArgumentParser(description='Process ES profile output.') | |
parser.add_argument("profile_json", type=argparse.FileType("r")) | |
args = parser.parse_args() | |
profile_json = json.load(args.profile_json) | |
shards = parse_profile(profile_json) | |
show_speed_per_shard(shards) | |
show_worst_leaf_nodes(shards) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment