1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
diff --git a/gpus.go b/gpus.go
index ca3bcaf..9e90421 100644
--- a/gpus.go
+++ b/gpus.go
@@ -38,15 +38,19 @@ func GPUsGetMetrics() *GPUsMetrics {
func ParseAllocatedGPUs() float64 {
var num_gpus = 0.0
- args := []string{"-a", "-X", "--format=Allocgres", "--state=RUNNING", "--noheader", "--parsable2"}
+ args := []string{"-a", "-X", "--format=AllocTRES", "--state=RUNNING", "--noheader", "--parsable2"}
output := string(Execute("sacct", args))
if len(output) > 0 {
for _, line := range strings.Split(output, "\n") {
if len(line) > 0 {
line = strings.Trim(line, "\"")
- descriptor := strings.TrimPrefix(line, "gpu:")
- job_gpus, _ := strconv.ParseFloat(descriptor, 64)
- num_gpus += job_gpus
+ for _, resource := range strings.Split(line, ",") {
+ if strings.HasPrefix(resource, "gres/gpu=") {
+ descriptor := strings.TrimPrefix(resource, "gres/gpu=")
+ job_gpus, _ := strconv.ParseFloat(descriptor, 64)
+ num_gpus += job_gpus
+ }
+ }
}
}
}
@@ -63,11 +67,17 @@ func ParseTotalGPUs() float64 {
for _, line := range strings.Split(output, "\n") {
if len(line) > 0 {
line = strings.Trim(line, "\"")
- descriptor := strings.Fields(line)[1]
- descriptor = strings.TrimPrefix(descriptor, "gpu:")
- descriptor = strings.Split(descriptor, "(")[0]
- node_gpus, _ := strconv.ParseFloat(descriptor, 64)
- num_gpus += node_gpus
+ gres := strings.Fields(line)[1]
+ // gres column format: comma-delimited list of resources
+ for _, resource := range strings.Split(gres, ",") {
+ if strings.HasPrefix(resource, "gpu:") {
+ // format: gpu:<type>:N(S:<something>), e.g. gpu:RTX2070:2(S:0)
+ descriptor := strings.Split(resource, ":")[2]
+ descriptor = strings.Split(descriptor, "(")[0]
+ node_gpus, _ := strconv.ParseFloat(descriptor, 64)
+ num_gpus += node_gpus
+ }
+ }
}
}
}
|