Skip to content

Instantly share code, notes, and snippets.

@anarazel
Created September 12, 2023 19:16
Show Gist options
  • Save anarazel/72389c9d9531dd06d2230a8c5ec5babd to your computer and use it in GitHub Desktop.
Save anarazel/72389c9d9531dd06d2230a8c5ec5babd to your computer and use it in GitHub Desktop.
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
def quantplot(df, percentile_limit=0.001, n=10, semilog = False):
quantiles = np.linspace(0+percentile_limit, 1-percentile_limit, num=n*2+1)
alpha = 1/n
# XXX: Caller
interval = '1s'
df_rs = df.resample(interval, on='time_us')
x_source = df_rs['elapsed_us']
# quantile() generates a multi-index with an index "column" for each
# computed quantile. Unstack moves those to result set columns.
#
# XXX: Column
df_q = x_source.quantile(quantiles).unstack()
# XXX, should perhaps be determined on the caller level?
y = df_q.index.seconds.values
# XXX, add optional smoothing?
#df_q = df_q.rolling(3).max()
fig, ax = plt.subplots()
if semilog:
ax.semilogy()
# Plot "area" between 50% quantile and the "lower" quantiles. By
# overlapping multiple transparent areas the more common quantiles become
# darker. Separate from "higher" quantiles so a different color can be used.
ranges = []
for i in range(0, n):
#print('below', quantiles[i], quantiles[n])
ranges.append(ax.fill_between(y, df_q[quantiles[i]], df_q[quantiles[n]], alpha=alpha, color='g', edgecolor=None))
# Same as above, but for "higher quantiles".
for i in range(n+1, (n*2)):
#print('above', quantiles[n], quantiles[i])
ranges.append(ax.fill_between(y, df_q[quantiles[n]], df_q[quantiles[i]], alpha=alpha, color='g', edgecolor=None))
# Plot median quantile as a line.
ax.plot(y, df_q[quantiles[n]], color='g', label='median quantile')
# Also add mean as a line
ax.plot(y, x_source.mean(), color='b', label = 'mean')
ax.set_xlabel('time in s')
ax.set_ylabel('duration in us')
fig.legend()
return fig, ax, ranges
if len(sys.argv) < 2:
print("pass file(s) as args", file=sys.stderr)
sys.exit(1)
for fname in sys.argv[1:]:
df = pd.read_csv(fname,
sep = ' ',
names = ['client', 'tx', 'elapsed_us', 'script_no', 'srctime_s', 'srctime_us'],
usecols = ['elapsed_us', 'srctime_s', 'srctime_us'],
engine = 'c')
# combine time-in-seconds with the microseconds column
usec_per_s = 1_000_000
df['time_us'] = df['srctime_s'] * usec_per_s + df['srctime_us']
# make time relative to start
start = df.loc[0,'time_us']
df['time_us'] = (df['time_us'] - start).astype('timedelta64[us]')
fig, ax, ranges = quantplot(df)
fig.set_figwidth(15)
fig.set_figheight(4)
fig.show()
plt.show(block=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment