Created
March 23, 2017 07:28
-
-
Save nitinbhojwani/3363884780c4e375c12e8cff28d977a5 to your computer and use it in GitHub Desktop.
Monitor individual process metrics - nmonitor usage and shell script.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ "$#" -ne 2 ]; then | |
echo "usage error: sh nmonitor.sh <pid> <time_to_monitor_in_seconds>";exit 1 | |
fi | |
pid=$1 | |
time=$2 | |
#read -p "Please enter your process Id: " pid | |
pid_re='^[0-9]+$' | |
if [ "$pid" = "" ] || ! [[ $pid =~ $pid_re ]] ; then | |
echo "error: pid must be a number"; exit 1 | |
else | |
echo "Seems valid pid. Let's check in the list of ps..."; | |
ps_output=`ps -p $pid` | |
comm_name=`ps -p $pid -o comm=` | |
if [ "`echo $ps_output | grep $pid`" = "" ] ; then | |
echo "error: no process with this pid exists"; exit 1 | |
else | |
echo "process name: $comm_name" | |
fi | |
fi | |
#read -p "Please enter the time in seconds for which to be monitored" time | |
if [ "$time" = "" ] ; then | |
time=600; | |
elif ! [[ $time =~ $pid_re ]] ; then | |
echo "error: time must be number of seconds"; exit 1 | |
fi | |
# create temp file for output | |
echo "starting the monitoring now." | |
temp_file=`mktemp` | |
echo "Generating a temporary file to write results: $temp_file" | |
# rm $temp_file | |
# echo "Removing Temporary File for now" | |
echo "Time(s) CPU(%) MEMORY(%) Disk_Read(B/s) Disk_Write(B/s) Disk_Read_Including_Cache(B/s) Disk_Write_Including_Cache(B/s) Network-In(B/s) Network-Out(B/s)" >> $temp_file | |
get_traffic() { | |
t=`awk '/eth0:/ { printf("%s,%d,%d\n",strftime("%s"),$2,$10); }' < /proc/$1/net/dev` | |
IN=${t#*,}; IN=${IN%,*} | |
OUT=${t##*,}; | |
TIME=${t%%,*}; | |
} | |
get_diskio() { | |
disk_output=( `awk -F ': ' '{ print $2 }' /proc/$1/io | xargs` ) | |
DISK_READ=${disk_output[4]} | |
DISK_WITH_CACHE_READ=${disk_output[0]} | |
DISK_WRITE=${disk_output[5]} | |
DISK_WITH_CACHE_WRITE=${disk_output[1]} | |
} | |
function write_final_output() | |
{ | |
echo "" >> $temp_file | |
echo "Average ${average_list[cpu]} ${average_list[memory]} ${average_list[disk_read]} ${average_list[disk_write]} ${average_list[disk_read_with_cache]} ${average_list[disk_write_with_cache]} ${average_list[network_in]} ${average_list[network_out]}" >> $temp_file | |
echo "Sum ${sum_list[cpu]} ${sum_list[memory]} ${sum_list[disk_read]} ${sum_list[disk_write]} ${sum_list[disk_read_with_cache]} ${sum_list[disk_write_with_cache]} ${sum_list[network_in]} ${sum_list[network_out]}" >> $temp_file | |
echo "Min ${min_list[cpu]} ${min_list[memory]} ${min_list[disk_read]} ${min_list[disk_write]} ${min_list[disk_read_with_cache]} ${min_list[disk_write_with_cache]} ${min_list[network_in]} ${min_list[network_out]}" >> $temp_file | |
echo "Max ${max_list[cpu]} ${max_list[memory]} ${max_list[disk_read]} ${max_list[disk_write]} ${max_list[disk_read_with_cache]} ${max_list[disk_write_with_cache]} ${max_list[network_in]} ${max_list[network_out]}" >> $temp_file | |
cat $temp_file | |
} | |
# this function is called when Ctrl-C is sent | |
function trap_ctrlc () | |
{ | |
echo "" | |
echo "Ctrl-C caught...performing clean up" | |
echo "" | |
echo "Writing Final Results..." | |
write_final_output | |
exit 2 | |
} | |
# below variables are for network usage | |
IN=0; OUT=0; TIME=0; | |
# below variables are for disk usage | |
DISK_READ=0; DISK_WITH_CACHE_READ=0; DISK_WRITE=0; DISK_WITH_CACHE_WRITE=0; | |
declare -a metrics=( "cpu" "memory" "disk_read" "disk_write" "disk_read_with_cache" "disk_write_with_cache" "network_in" "network_out" ) | |
declare -A current_list=( ["cpu"]=0 ["memory"]=0 ["disk_read"]=0 ["disk_write"]=0 ["disk_read_with_cache"]=0 ["disk_write_with_cache"]=0 ["network_in"]=0 ["network_out"]=0 ) | |
declare -A max_list=( ["cpu"]=0 ["memory"]=0 ["disk_read"]=0 ["disk_write"]=0 ["disk_read_with_cache"]=0 ["disk_write_with_cache"]=0 ["network_in"]=0 ["network_out"]=0 ) | |
declare -A min_list=( ["cpu"]=0 ["memory"]=0 ["disk_read"]=0 ["disk_write"]=0 ["disk_read_with_cache"]=0 ["disk_write_with_cache"]=0 ["network_in"]=0 ["network_out"]=0 ) | |
declare -A average_list=( ["cpu"]=0 ["memory"]=0 ["disk_read"]=0 ["disk_write"]=0 ["disk_read_with_cache"]=0 ["disk_write_with_cache"]=0 ["network_in"]=0 ["network_out"]=0 ) | |
declare -A sum_list=( ["cpu"]=0 ["memory"]=0 ["disk_read"]=0 ["disk_write"]=0 ["disk_read_with_cache"]=0 ["disk_write_with_cache"]=0 ["network_in"]=0 ["network_out"]=0 ) | |
get_traffic $pid | |
get_diskio $pid | |
# initialise trap to call trap_ctrlc function | |
# when signal 2 (SIGINT) is received | |
trap "trap_ctrlc" 2 | |
for i in `seq 1 $time`; | |
do | |
top_result=( `top -b -p $pid -n 1 | grep "^[ ]*$pid"` ) | |
_IN=$IN; _OUT=$OUT; _TIME=$TIME | |
get_traffic $pid | |
_DISK_READ=$DISK_READ; _DISK_WITH_CACHE_READ=$DISK_WITH_CACHE_READ; _DISK_WRITE=$DISK_WRITE; _DISK_WITH_CACHE_WRITE=$DISK_WITH_CACHE_WRITE; | |
get_diskio $pid | |
current_list["cpu"]=${top_result[6]} | |
current_list["memory"]=${top_result[7]} | |
current_list["disk_read"]=$(( $DISK_READ - $_DISK_READ )) | |
current_list["disk_write"]=$(( $DISK_WRITE - $_DISK_WRITE )) | |
current_list["disk_read_with_cache"]=$(( $DISK_WITH_CACHE_READ - $_DISK_WITH_CACHE_READ )) | |
current_list["disk_write_with_cache"]=$(( $DISK_WITH_CACHE_WRITE - $_DISK_WITH_CACHE_WRITE )) | |
current_list["network_in"]=$(( $IN - $_IN )) | |
current_list["network_out"]=$(( $OUT - $_OUT)) | |
for metric in "${metrics[@]}"; | |
do | |
sum_list[$metric]=`echo "${sum_list[$metric]} + ${current_list[$metric]}" | bc -l`; | |
if [[ ${min_list[$metric]} > ${current_list[$metric]} ]] || [ "$i" = "1" ]; then | |
min_list[$metric]=${current_list[$metric]}; | |
fi | |
if [[ ${max_list[$metric]} < ${current_list[$metric]} ]] || [ "$i" = "1" ]; then | |
max_list[$metric]=${current_list[$metric]}; | |
fi | |
average_list[$metric]=`echo "${sum_list[$metric]} / $i" | bc -l`; | |
done | |
curr_time=`date +"%d-%m-%Y-%H:%M:%S"` | |
echo "$curr_time ${top_result[6]} ${top_result[7]} ${current_list["disk_read"]} ${current_list["disk_write"]} ${current_list["disk_read_with_cache"]} ${current_list["disk_write_with_cache"]} $(( $IN - $_IN )) $(( $OUT - $_OUT))" >> $temp_file | |
sleep 1 | |
done | |
write_final_output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Currently the script nmonitor.sh(attached) works fine for collecting following data each second: | |
CPU % | |
Memory % | |
Disk Read (b/s) - Actual Read that happened from Storage | |
Disk Write (b/s) - Actual Write that happened on Storage | |
Disk Read Caused (b/s) - Read caused by the process but may not be read completely from storage. | |
Disk Write Caused (b/s) - Write caused by the process but may not be written completely to storage. | |
Network In (b/s) | |
Network Out (b/s) | |
# Doc for Disk Read and Write: | |
# https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/proc.txt?id=HEAD#l1387 | |
Steps to use the script: | |
Set Irix mode off for top utility to get CPU % of overall CPU resource available. | |
For this, | |
open top, | |
press Shift + I to turn off Iris Mode, | |
press Shift + W to save the above settings, | |
press Ctrl + C to exit from top | |
top | |
Shift + I | |
Shift + W | |
Ctrl + C | |
Execute nmonitor.sh with process-id and time-in-seconds for which monitoring needs to be done. | |
sh nmonitor.sh <pid> <time_in_seconds> | |
* nmonitor.sh attached below | |
Note: | |
Script can be stopped any time by Ctrl+C. | |
Output will be written in a file created in /tmp/ directory. The path will be shown as output of script execution. | |
Currently script is tested and used while running in foreground. So one terminal tab needs to be kept opened for that. It can be executed in background also. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment