Skip to content

Instantly share code, notes, and snippets.

@stolen
Created January 18, 2018 16:08
Show Gist options
  • Save stolen/9a28ed9403c724541b0ee5fcd822613e to your computer and use it in GitHub Desktop.
Save stolen/9a28ed9403c724541b0ee5fcd822613e to your computer and use it in GitHub Desktop.
top processes by scheduled time
-module(timetop).
-export([top/2]).
top(Duration, Count) ->
OldPrio = erlang:process_flag(priority, high),
Result = scheduled_time_top(Duration),
erlang:process_flag(priority, OldPrio),
lists:sublist(Result, Count).
%% Build process top according to scheduled time
%% For simplicity, we ignore newly started processes
scheduled_time_top(Duration) when is_integer(Duration) ->
% Start collecting trace data and remember when we did that
erlang:trace(existing, true, [running,monotonic_timestamp]),
StartTime = erlang:monotonic_time(nano_seconds),
% Remember intermediate process states so we can guess if process was running all the time
Pids = erlang:processes(),
DefaultStates = [{P, S} || P <- Pids, P =/= self(), {status, S} <- [process_info(P, status)]],
% Set trace finish timer
TRef = erlang:start_timer(Duration, self(), stop_tracing),
% Collect events until timer fires
RevEvents0 = acc_sched_trace_events(StartTime, undefined, TRef, []),
% Finish tracing, remember the time
EndTime = erlang:monotonic_time(nano_seconds),
erlang:trace(existing, false, [running,monotonic_timestamp]),
% Collect remaining events
FinTRef = erlang:start_timer(50, self(), stop_collecting),
RevEvents1 = acc_sched_trace_events(StartTime, EndTime, FinTRef, []),
% Concatenate all events and put them in order
AllEvents = lists:reverse(RevEvents0, lists:reverse(RevEvents1)),
% For every traced process, calculate its runnint time percentage
RunPercentages = [{P, running_time_percentage(P, AllEvents, StartTime, EndTime, DefState)} || {P, DefState} <- DefaultStates, erlang:is_process_alive(P)],
% Sort the result
SortedRunPercentages = lists:sort(fun({_, P1}, {_, P2}) -> P1 >= P2 end, RunPercentages),
% Trim percentages for short printing by default
TrimmedSortedRunPercentages = [{P, round(R * 100) / 100} || {P, R} <- SortedRunPercentages],
% Drop any extra trace messages
drop_sched_trace_events(),
TrimmedSortedRunPercentages.
running_time_percentage(Pid, AllEvents, StartTime, EndTime, DefState) ->
PidEvents = [{Timestamp, Ev} || {Timestamp, EvPid, Ev} <- AllEvents, EvPid == Pid, StartTime =< Timestamp, Timestamp =< EndTime],
case PidEvents of
[] when DefState == running ->
100.0;
[] ->
0.0;
Events ->
caclculate_running_time(StartTime, EndTime, lists:sort(Events)) * 100.0 / (EndTime - StartTime)
end.
acc_sched_trace_events(StartTime, EndTime, TRef, Acc) ->
Self = self(),
receive
{timeout, TRef, _} ->
Acc;
{trace_ts, Pid, Ev, _, Timestamp} when Pid /= Self, StartTime =< Timestamp, (EndTime == undefined orelse Timestamp =< EndTime) ->
acc_sched_trace_events(StartTime, EndTime, TRef, [{Timestamp, Pid, Ev}|Acc]);
{trace_ts, _, _, _, _} ->
acc_sched_trace_events(StartTime, EndTime, TRef, Acc)
end.
drop_sched_trace_events() ->
receive
{trace_ts, _, _, _, _} -> drop_sched_trace_events()
after 0 -> ok
end.
caclculate_running_time(_StartTime, _EndTime, []) ->
0;
caclculate_running_time(_StartTime, EndTime, [{SingleInTime, in}]) ->
EndTime - SingleInTime;
caclculate_running_time(StartTime, EndTime, [{OutTime, out} | Events]) ->
% If we see a process out, assume it was initially in
caclculate_running_time(StartTime, EndTime, [{StartTime, in}, {OutTime, out} | Events]);
caclculate_running_time(_StartTime, EndTime, [{InTime, in}, {OutTime, out} | Events]) ->
(OutTime - InTime) + caclculate_running_time(OutTime, EndTime, Events).
@stolen
Copy link
Author

stolen commented Nov 14, 2018

This simple tool allows you to see which processes spend most time on scheduler.
It could be useful when you see a strange scheduler utilization or just have no idea why is everything slow.

Usage:

> timetop:top(200, 10). % top 10 processes by scheduled time, data collected for 200 milliseconds

How to choose collection time:

  • 200 milliseconds is a good start. In most cases it gives something useful and is fast enough (a few seconds to process data).
  • Smaller values are OK, but you may miss something.
  • Use greater values at your own risk — on highly loaded systems with many cores you may wait for result A LOT, consuming A LOT of memory for incoming events.

It could be useful to run the timetop multiple times to see if something is changing right now.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment