Created
January 18, 2018 16:08
-
-
Save stolen/9a28ed9403c724541b0ee5fcd822613e to your computer and use it in GitHub Desktop.
top processes by scheduled time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(timetop). | |
-export([top/2]). | |
top(Duration, Count) -> | |
OldPrio = erlang:process_flag(priority, high), | |
Result = scheduled_time_top(Duration), | |
erlang:process_flag(priority, OldPrio), | |
lists:sublist(Result, Count). | |
%% Build process top according to scheduled time | |
%% For simplicity, we ignore newly started processes | |
scheduled_time_top(Duration) when is_integer(Duration) -> | |
% Start collecting trace data and remember when we did that | |
erlang:trace(existing, true, [running,monotonic_timestamp]), | |
StartTime = erlang:monotonic_time(nano_seconds), | |
% Remember intermediate process states so we can guess if process was running all the time | |
Pids = erlang:processes(), | |
DefaultStates = [{P, S} || P <- Pids, P =/= self(), {status, S} <- [process_info(P, status)]], | |
% Set trace finish timer | |
TRef = erlang:start_timer(Duration, self(), stop_tracing), | |
% Collect events until timer fires | |
RevEvents0 = acc_sched_trace_events(StartTime, undefined, TRef, []), | |
% Finish tracing, remember the time | |
EndTime = erlang:monotonic_time(nano_seconds), | |
erlang:trace(existing, false, [running,monotonic_timestamp]), | |
% Collect remaining events | |
FinTRef = erlang:start_timer(50, self(), stop_collecting), | |
RevEvents1 = acc_sched_trace_events(StartTime, EndTime, FinTRef, []), | |
% Concatenate all events and put them in order | |
AllEvents = lists:reverse(RevEvents0, lists:reverse(RevEvents1)), | |
% For every traced process, calculate its runnint time percentage | |
RunPercentages = [{P, running_time_percentage(P, AllEvents, StartTime, EndTime, DefState)} || {P, DefState} <- DefaultStates, erlang:is_process_alive(P)], | |
% Sort the result | |
SortedRunPercentages = lists:sort(fun({_, P1}, {_, P2}) -> P1 >= P2 end, RunPercentages), | |
% Trim percentages for short printing by default | |
TrimmedSortedRunPercentages = [{P, round(R * 100) / 100} || {P, R} <- SortedRunPercentages], | |
% Drop any extra trace messages | |
drop_sched_trace_events(), | |
TrimmedSortedRunPercentages. | |
running_time_percentage(Pid, AllEvents, StartTime, EndTime, DefState) -> | |
PidEvents = [{Timestamp, Ev} || {Timestamp, EvPid, Ev} <- AllEvents, EvPid == Pid, StartTime =< Timestamp, Timestamp =< EndTime], | |
case PidEvents of | |
[] when DefState == running -> | |
100.0; | |
[] -> | |
0.0; | |
Events -> | |
caclculate_running_time(StartTime, EndTime, lists:sort(Events)) * 100.0 / (EndTime - StartTime) | |
end. | |
acc_sched_trace_events(StartTime, EndTime, TRef, Acc) -> | |
Self = self(), | |
receive | |
{timeout, TRef, _} -> | |
Acc; | |
{trace_ts, Pid, Ev, _, Timestamp} when Pid /= Self, StartTime =< Timestamp, (EndTime == undefined orelse Timestamp =< EndTime) -> | |
acc_sched_trace_events(StartTime, EndTime, TRef, [{Timestamp, Pid, Ev}|Acc]); | |
{trace_ts, _, _, _, _} -> | |
acc_sched_trace_events(StartTime, EndTime, TRef, Acc) | |
end. | |
drop_sched_trace_events() -> | |
receive | |
{trace_ts, _, _, _, _} -> drop_sched_trace_events() | |
after 0 -> ok | |
end. | |
caclculate_running_time(_StartTime, _EndTime, []) -> | |
0; | |
caclculate_running_time(_StartTime, EndTime, [{SingleInTime, in}]) -> | |
EndTime - SingleInTime; | |
caclculate_running_time(StartTime, EndTime, [{OutTime, out} | Events]) -> | |
% If we see a process out, assume it was initially in | |
caclculate_running_time(StartTime, EndTime, [{StartTime, in}, {OutTime, out} | Events]); | |
caclculate_running_time(_StartTime, EndTime, [{InTime, in}, {OutTime, out} | Events]) -> | |
(OutTime - InTime) + caclculate_running_time(OutTime, EndTime, Events). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This simple tool allows you to see which processes spend most time on scheduler.
It could be useful when you see a strange scheduler utilization or just have no idea why is everything slow.
Usage:
How to choose collection time:
It could be useful to run the timetop multiple times to see if something is changing right now.