forked from cdt-data-science/cluster-scripts
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfree-gpus
executable file
·42 lines (35 loc) · 1.45 KB
/
free-gpus
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
print_usage () {
cat << EOM
Usage: $0
List nodes with free GPUs, as well as a breakdown of how many GPUs in each node are available.
Arguments:
None.
Output:
Lists each node with at least one free GPU on separate lines.
Also lists a breakdown of the states of the GPUs:
-- The 'in_use' column shows the number of GPUs on the node that are currently allocated to a job.
-- The 'usable' column shows the number of GPUs on the node that are not in an error state or otherwise unable
to be allocated to a job.
-- The 'total' column shows the total number of GPUs on the node, including those that are currently unable to
be allocated to a job.
The number of free GPUs (GPUs not allocated to a job and not in an error state) on a node is the difference between
the 'usable' column and the 'in_use' column.
EOM
}
if [ ${#@} -gt 0 ]; then
echo "${#@}"
print_usage
exit
fi
relative_dir=`dirname $0`
full_capacity=`${relative_dir}/gpu-usage -h | awk -F',' '{if ($1 == $2) printf("true"); else printf("false")}'`
free_gpus=`${relative_dir}/gpu-usage-by-node -p | awk 'NR==1 {printf "%s,%s,%s\n",$1,$2,$6} NR > 1 {if ($7 > 0) printf "%s %s,%s,%s\n",$1,$2,$3,$7}' | column -t -s','`
if $full_capacity; then
echo "No gpus free...join the squeue!"
echo "Tip: you can use the command sprio to observe your job's priority"
echo "Current queue:"
sprio -l
else
echo "${free_gpus}"
fi