Skip to content

Instantly share code, notes, and snippets.

@kcha
Last active August 29, 2015 13:58
Show Gist options
  • Save kcha/10421887 to your computer and use it in GitHub Desktop.
Save kcha/10421887 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
# KH: This is modified from the original lastjoboutput script.
#
# Shows the output of the last N jobs that finished running on the cluster
# with an exit status OTHER THAN ZERO.
# Convenience function to avoid having to find the most recent job output file
use strict;
use warnings;
use Getopt::Long;
# GLOBALS
$ENV{PBS_OUTPUT} ||= "$ENV{HOME}/pbs-output"; # Folder with pbs output files
# ARGUMENTS
my $sHelp = 0;
my $nFileCount = 1;
my $nSortByID = 0;
my $bySummary = 0;
my $showAll = 0;
GetOptions("help!" => \$sHelp,
"i!" => \$nSortByID,
"n:i" => \$nFileCount,
"s!" => \$bySummary,
"all!" => \$showAll);
if ($showAll) {
$nFileCount = ($nFileCount > 10 ? $nFileCount : 10);
$bySummary = 1;
}
# PRINT HELP
my $sScriptName = ($0 =~ /^.*\/(.+$)/) ? $1 : $0;
if ($sHelp) {
die <<HELP
$sScriptName [ -n | -i ]
Shows the content of the pbs output file for the most recently finished job(s)
that had a non-zero exit status.
Options:
-n <integer>
Show output of the last n files, instead of just the last one
-i
Use job ID rather than file modification time to select most recent jobs
-help
This help message
-s
Summary format. Show summary table of failed jobs.
-all
Shortcut for lastjoboutput and -s option (e.g. list most recent job(s) in
summary format)
HELP
}
# START
my $tic = time();
# Sort the output files by job ID or modification date
opendir my($dir), $ENV{PBS_OUTPUT} or die "Can't open $ENV{PBS_OUTPUT} : $!\n";
my @asFiles = grep { -f "$ENV{PBS_OUTPUT}/$_" } readdir $dir;
if ($nSortByID){
my @anJobIDs = @asFiles;
for my $nID (@anJobIDs){
$nID =~ s/\..*//; # strip extension to only leave job ID
}
my @anSortOrder = (0 .. scalar(@asFiles)-1);
@anSortOrder = sort {$anJobIDs[$b] <=> $anJobIDs[$a]} @anSortOrder;
@asFiles = @asFiles[@anSortOrder];
}
else{
@asFiles = sort { eval('-M "$ENV{PBS_OUTPUT}/$a" <=> -M "$ENV{PBS_OUTPUT}/$b"') } @asFiles;
}
# Find the files with non-zero exit status
my @exitstatus = ();
$nFileCount = scalar(@asFiles) if (scalar(@asFiles)<$nFileCount);
die "No job output files found\n" unless($nFileCount);
my $i = 0;
my $n = 0;
my @failedJobs = ();
while ($n < @asFiles && $i < $nFileCount) {
my $status = `tail -1 "$ENV{PBS_OUTPUT}/$asFiles[$n]"`;
if ($status =~ /Exit status\s+:\s+(\d+)/) {
push @exitstatus, $1;
if ($showAll || $1 != 0) {
push @failedJobs, $n;
$i++;
}
} else {
# should never enter this if regular expression is working correctly
#die "No pattern match to exit status"
push @exitstatus, "unk";
push @failedJobs, $n;
$i++;
}
$n++;
}
# Print the sorted list of job outputs
my $N = @failedJobs;
if (!$bySummary) {
for ($i = 0; $i < $N; $i++) {
my $f = pop @failedJobs;
my $time = localtime((stat("$ENV{PBS_OUTPUT}/$asFiles[$f]"))[9]);
print "\n## $time #################################################################\n";
print "==> $asFiles[$f]\n";
open OUT, "$ENV{PBS_OUTPUT}/$asFiles[$f]" or die "Can't open $ENV{PBS_OUTPUT}/$asFiles[$f]: $!\n";
while (<OUT>){print;}
close OUT;
print "\n";
}
} else {
print "\n====================================================================================\n";
print sprintf("%-10s | %-19s | %-4s | %-20s | %-62s\n", 'Job','Time','Exit','Command', 'Resources');
print "------------------------------------------------------------------------------------\n";
for ($i = 0; $i < $N; $i++) {
my $f = pop @failedJobs;
my $time = localtime((stat("$ENV{PBS_OUTPUT}/$asFiles[$f]"))[9]);
$time = substr($time, 0, -5); # remove year
open OUT, "$ENV{PBS_OUTPUT}/$asFiles[$f]" or die "Can't open $ENV{PBS_OUTPUT}/$asFiles[$f]: $!\n";
my @info = grep {/Run command|Execution host|Resources used/} <OUT>;
# parse job id
my $jobid = $1 if ($asFiles[$f] =~ /\.o(\d+)$/);
# parse command
my $cmd = '';
$cmd = $1 if $info[0] =~ /Run command\s+:\s+(.*)/;
# parse resources
my $rsc = '';
if (defined $info[2] && $info[2] =~ /Resources used\s+:\s+([^\n]*)/) {
$rsc = $1;
}
close OUT;
print sprintf("%-10s | %-19s | %-4s | %-20s | %-62s\n",
$jobid, $time, $exitstatus[$f], $cmd, $rsc);
}
}
# End time
my $toc = time();
my $run_time = $toc - $tic;
printf STDERR "\nTook %0.2f seconds\n", $run_time;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment