Last active
August 29, 2015 13:58
-
-
Save kcha/10421887 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# KH: This is modified from the original lastjoboutput script. | |
# | |
# Shows the output of the last N jobs that finished running on the cluster | |
# with an exit status OTHER THAN ZERO. | |
# Convenience function to avoid having to find the most recent job output file | |
use strict; | |
use warnings; | |
use Getopt::Long; | |
# GLOBALS | |
$ENV{PBS_OUTPUT} ||= "$ENV{HOME}/pbs-output"; # Folder with pbs output files | |
# ARGUMENTS | |
my $sHelp = 0; | |
my $nFileCount = 1; | |
my $nSortByID = 0; | |
my $bySummary = 0; | |
my $showAll = 0; | |
GetOptions("help!" => \$sHelp, | |
"i!" => \$nSortByID, | |
"n:i" => \$nFileCount, | |
"s!" => \$bySummary, | |
"all!" => \$showAll); | |
if ($showAll) { | |
$nFileCount = ($nFileCount > 10 ? $nFileCount : 10); | |
$bySummary = 1; | |
} | |
# PRINT HELP | |
my $sScriptName = ($0 =~ /^.*\/(.+$)/) ? $1 : $0; | |
if ($sHelp) { | |
die <<HELP | |
$sScriptName [ -n | -i ] | |
Shows the content of the pbs output file for the most recently finished job(s) | |
that had a non-zero exit status. | |
Options: | |
-n <integer> | |
Show output of the last n files, instead of just the last one | |
-i | |
Use job ID rather than file modification time to select most recent jobs | |
-help | |
This help message | |
-s | |
Summary format. Show summary table of failed jobs. | |
-all | |
Shortcut for lastjoboutput and -s option (e.g. list most recent job(s) in | |
summary format) | |
HELP | |
} | |
# START | |
my $tic = time(); | |
# Sort the output files by job ID or modification date | |
opendir my($dir), $ENV{PBS_OUTPUT} or die "Can't open $ENV{PBS_OUTPUT} : $!\n"; | |
my @asFiles = grep { -f "$ENV{PBS_OUTPUT}/$_" } readdir $dir; | |
if ($nSortByID){ | |
my @anJobIDs = @asFiles; | |
for my $nID (@anJobIDs){ | |
$nID =~ s/\..*//; # strip extension to only leave job ID | |
} | |
my @anSortOrder = (0 .. scalar(@asFiles)-1); | |
@anSortOrder = sort {$anJobIDs[$b] <=> $anJobIDs[$a]} @anSortOrder; | |
@asFiles = @asFiles[@anSortOrder]; | |
} | |
else{ | |
@asFiles = sort { eval('-M "$ENV{PBS_OUTPUT}/$a" <=> -M "$ENV{PBS_OUTPUT}/$b"') } @asFiles; | |
} | |
# Find the files with non-zero exit status | |
my @exitstatus = (); | |
$nFileCount = scalar(@asFiles) if (scalar(@asFiles)<$nFileCount); | |
die "No job output files found\n" unless($nFileCount); | |
my $i = 0; | |
my $n = 0; | |
my @failedJobs = (); | |
while ($n < @asFiles && $i < $nFileCount) { | |
my $status = `tail -1 "$ENV{PBS_OUTPUT}/$asFiles[$n]"`; | |
if ($status =~ /Exit status\s+:\s+(\d+)/) { | |
push @exitstatus, $1; | |
if ($showAll || $1 != 0) { | |
push @failedJobs, $n; | |
$i++; | |
} | |
} else { | |
# should never enter this if regular expression is working correctly | |
#die "No pattern match to exit status" | |
push @exitstatus, "unk"; | |
push @failedJobs, $n; | |
$i++; | |
} | |
$n++; | |
} | |
# Print the sorted list of job outputs | |
my $N = @failedJobs; | |
if (!$bySummary) { | |
for ($i = 0; $i < $N; $i++) { | |
my $f = pop @failedJobs; | |
my $time = localtime((stat("$ENV{PBS_OUTPUT}/$asFiles[$f]"))[9]); | |
print "\n## $time #################################################################\n"; | |
print "==> $asFiles[$f]\n"; | |
open OUT, "$ENV{PBS_OUTPUT}/$asFiles[$f]" or die "Can't open $ENV{PBS_OUTPUT}/$asFiles[$f]: $!\n"; | |
while (<OUT>){print;} | |
close OUT; | |
print "\n"; | |
} | |
} else { | |
print "\n====================================================================================\n"; | |
print sprintf("%-10s | %-19s | %-4s | %-20s | %-62s\n", 'Job','Time','Exit','Command', 'Resources'); | |
print "------------------------------------------------------------------------------------\n"; | |
for ($i = 0; $i < $N; $i++) { | |
my $f = pop @failedJobs; | |
my $time = localtime((stat("$ENV{PBS_OUTPUT}/$asFiles[$f]"))[9]); | |
$time = substr($time, 0, -5); # remove year | |
open OUT, "$ENV{PBS_OUTPUT}/$asFiles[$f]" or die "Can't open $ENV{PBS_OUTPUT}/$asFiles[$f]: $!\n"; | |
my @info = grep {/Run command|Execution host|Resources used/} <OUT>; | |
# parse job id | |
my $jobid = $1 if ($asFiles[$f] =~ /\.o(\d+)$/); | |
# parse command | |
my $cmd = ''; | |
$cmd = $1 if $info[0] =~ /Run command\s+:\s+(.*)/; | |
# parse resources | |
my $rsc = ''; | |
if (defined $info[2] && $info[2] =~ /Resources used\s+:\s+([^\n]*)/) { | |
$rsc = $1; | |
} | |
close OUT; | |
print sprintf("%-10s | %-19s | %-4s | %-20s | %-62s\n", | |
$jobid, $time, $exitstatus[$f], $cmd, $rsc); | |
} | |
} | |
# End time | |
my $toc = time(); | |
my $run_time = $toc - $tic; | |
printf STDERR "\nTook %0.2f seconds\n", $run_time; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment