Last active
February 27, 2018 10:49
-
-
Save yoe/1e0f11effb9ac9ec15cd1f119299ca30 to your computer and use it in GitHub Desktop.
script to read the gridengine accounting file and parse the qstat output into a prometheus stat
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use warnings; | |
use Scalar::Util qw(looks_like_number); | |
open FH, '</opt/sge/default/common/accounting'; | |
my @fields = ('qname', 'hostname', 'group', 'owner', 'job_name', 'job_number', 'account', 'priority', 'submission_time', 'start_time', 'end_time', 'failed', 'exit_status', 'ru_wallclock', 'ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_ismrss', 'ru_idrss', 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', 'project', 'department', 'granted_pe', 'slots', 'task_number', 'cpu', 'mem', 'io', 'category', 'iow', 'pe_taskid', 'maxvmem', 'arid', 'ar_sub_time'); | |
my %results; | |
$results{job_count} = {}; # number of jobs | |
$results{job_time} = {}; # total time between start and end time | |
$results{job_delay} = {}; # total time between submission and start time | |
$results{job_slots} = {}; # number of used slots | |
$results{job_cpu} = {}; # total CPU time used | |
$ENV{SGE_ROOT} = "/opt/sge"; | |
$ENV{SGE_CELL} = "default"; | |
$ENV{PATH} = "$ENV{SGE_ROOT}/bin/lx-amd64:" . $ENV{PATH}; | |
my %helps = ( | |
job_count => "The total number of finished gridengine jobs", | |
job_time => "The total time between start and end time of jobs", | |
job_delay => "The total time between submission and start time of jobs", | |
job_slots => "The number of slots used for jobs", | |
job_cpu => "The CPU time in seconds used for all jobs", | |
slots => "The number of known slots", | |
waiting => "The number of jobs waiting to be scheduled", | |
); | |
my %types = ( | |
job_count => "counter", | |
job_time => "counter", | |
job_delay => "counter", | |
job_slots => "counter", | |
job_cpu => "counter", | |
slots => "gauge", | |
waiting => "gauge", | |
); | |
sub add_val($$$) { | |
my $hash = shift; | |
my $label = shift; | |
my $val = shift; | |
return if(!looks_like_number($val)); | |
$hash->{$label} = 0 unless exists($hash->{$label}); | |
$hash->{$label} += abs($val); | |
} | |
for(;;) { | |
while(<FH>) { | |
next if /^#/; | |
chomp; | |
my @line = split /:/; | |
my $val = {}; | |
foreach my $field(@fields) { | |
$val->{$field} = shift(@line); | |
} | |
my $label = "qname=\"$val->{qname}\",hostname=\"$val->{hostname}\",group=\"$val->{group}\",owner=\"$val->{owner}\",priority=\"$val->{priority}\",exit_status=\"$val->{exit_status}\",department=\"$val->{department}\",project=\"$val->{project}\",pe=\"$val->{granted_pe}\""; | |
add_val($results{job_count}, $label, 1); | |
add_val($results{job_time}, $label, ($val->{end_time} - $val->{start_time})); | |
add_val($results{job_delay}, $label, ($val->{start_time} - $val->{submission_time})); | |
add_val($results{job_slots}, $label, $val->{slots}); | |
add_val($results{job_cpu}, $label, $val->{cpu}); | |
} | |
$results{slots} = {}; | |
$results{waiting} = {}; | |
my $fh; | |
open $fh, "qstat -F -u '*'|"; | |
while(<$fh>) { | |
my $label; | |
if(/(.*)\@([-a-z0-9]+).*\/([0-9]+)\/([0-9]+)/) { | |
my $qname = $1; | |
my $host = $2; | |
my $slots_used = $3; | |
my $total_slots = $4; | |
my $avail = $total_slots - $slots_used; | |
add_val($results{slots}, "qname=\"$qname\",hostname=\"$host\",state=\"used\"", $slots_used); | |
add_val($results{slots}, "qname=\"$qname\",hostname=\"$host\",state=\"available\"", $total_slots - $slots_used); | |
} | |
if(/ .qw /) { | |
add_val($results{waiting}, "", 1); | |
} | |
} | |
close $fh; | |
open $fh,">/tmp/sge.prom.tmp" or die $!; | |
foreach my $metric(keys %results) { | |
print $fh "# TYPE gridengine_$metric $types{$metric}\n"; | |
print $fh "# HELP gridengine_$metric $helps{$metric}\n"; | |
foreach my $labels(keys %{$results{$metric}}) { | |
if(length ($labels) > 0) { | |
print $fh "gridengine_$metric\{$labels\} $results{$metric}->{$labels}\n"; | |
} else { | |
print $fh "gridengine_$metric $results{$metric}->{\"\"}\n"; | |
} | |
} | |
} | |
close $fh; | |
rename "/tmp/sge.prom.tmp","/var/lib/prometheus/node-exporter/sge.prom"; | |
sleep 40; | |
seek FH,0,1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment