Last active
August 30, 2024 08:20
-
-
Save giannisdaras/444805d996f274bca162c529938b81e8 to your computer and use it in GitHub Desktop.
Custom Slurm commands
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sbatch_test() { | |
local nodes=${1:-1} # Default to 1 node if not specified | |
shift # Remove the first argument (number of nodes) | |
sbatch \ | |
--nodes="$nodes" \ | |
--ntasks-per-node=1 \ | |
--time=01:00:00 \ | |
--job-name="test_job" \ | |
--output="job_%j.out" \ | |
"$@" \ | |
--wrap="echo 'Job started on \$(hostname)'; sleep 60; echo 'Job finished'" | |
} | |
job_start() { | |
local job_id=$1 | |
if [ -z "$job_id" ]; then | |
echo "Please provide a job ID as an argument." | |
return 1 | |
fi | |
# Get the estimated start time | |
local start_time=$(squeue -j $job_id --start --noheader --format="%S") | |
if [ -z "$start_time" ] || [ "$start_time" = "N/A" ]; then | |
echo "No valid start time available for job $job_id. The job may not exist, may have already started, or may be waiting in the queue." | |
return 1 | |
fi | |
# Convert start time to epoch | |
local start_epoch=$(date -d "$start_time" +%s 2>/dev/null) | |
if [ $? -ne 0 ]; then | |
echo "Unable to parse the start time: $start_time" | |
return 1 | |
fi | |
# Get current time in epoch | |
local current_epoch=$(date +%s) | |
# Calculate the difference in seconds | |
local diff_seconds=$((start_epoch - current_epoch)) | |
# Convert seconds to hours (rounding down) | |
local hours=$((diff_seconds / 3600)) | |
echo "$hours hours until the job starts" | |
} | |
node_usage() { | |
local queue_name="${1:-gh}" | |
# Get total nodes in the queue | |
local total_nodes=$(sinfo -h -p "$queue_name" -o "%D" | awk '{sum += $1} END {print sum}') | |
# Get running nodes in the queue | |
local running_nodes=$(squeue -a -h -t running -p "$queue_name" -o "%D" | awk '{sum += $1} END {print sum}') | |
# Calculate the ratio | |
local ratio=$(awk "BEGIN {printf \"%.2f\", $running_nodes / $total_nodes}") | |
echo "Queue: $queue_name" | |
echo "Running Nodes: $running_nodes" | |
echo "Total Nodes: $total_nodes" | |
echo "Usage Ratio: $ratio" | |
echo "($running_nodes / $total_nodes)" | |
} | |
top_users() { | |
local queue_name="${1:-gh}" | |
squeue -a -h -o "%u %D %t" -p "$queue_name" | | |
awk '$3 == "R" {sum[$1] += $2} END {for (user in sum) print user, sum[user]}' | | |
sort -rnk2 | | |
head -n 5 | | |
awk '{printf "%d. User: %s, Nodes: %s\n", NR, $1, $2}' | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment