summarylogtreecommitdiffstats
path: root/gpus.patch
blob: 8621e92945480716e1b883ed6d3dbbf1885987cc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
diff --git a/gpus.go b/gpus.go
index ca3bcaf..9e90421 100644
--- a/gpus.go
+++ b/gpus.go
@@ -38,15 +38,19 @@ func GPUsGetMetrics() *GPUsMetrics {
 func ParseAllocatedGPUs() float64 {
 	var num_gpus = 0.0
 
-	args := []string{"-a", "-X", "--format=Allocgres", "--state=RUNNING", "--noheader", "--parsable2"}
+	args := []string{"-a", "-X", "--format=AllocTRES", "--state=RUNNING", "--noheader", "--parsable2"}
 	output := string(Execute("sacct", args))
 	if len(output) > 0 {
 		for _, line := range strings.Split(output, "\n") {
 			if len(line) > 0 {
 				line = strings.Trim(line, "\"")
-				descriptor := strings.TrimPrefix(line, "gpu:")
-				job_gpus, _ := strconv.ParseFloat(descriptor, 64)
-				num_gpus += job_gpus
+				for _, resource := range strings.Split(line, ",") {
+					if strings.HasPrefix(resource, "gres/gpu=") {
+						descriptor := strings.TrimPrefix(resource, "gres/gpu=")
+						job_gpus, _ := strconv.ParseFloat(descriptor, 64)
+						num_gpus += job_gpus
+					}
+				}
 			}
 		}
 	}
@@ -63,11 +67,17 @@ func ParseTotalGPUs() float64 {
 		for _, line := range strings.Split(output, "\n") {
 			if len(line) > 0 {
 				line = strings.Trim(line, "\"")
-				descriptor := strings.Fields(line)[1]
-				descriptor = strings.TrimPrefix(descriptor, "gpu:")
-				descriptor = strings.Split(descriptor, "(")[0]
-				node_gpus, _ :=  strconv.ParseFloat(descriptor, 64)
-				num_gpus += node_gpus
+				gres := strings.Fields(line)[1]
+				// gres column format: comma-delimited list of resources
+				for _, resource := range strings.Split(gres, ",") {
+					if strings.HasPrefix(resource, "gpu:") {
+						// format: gpu:<type>:N(S:<something>), e.g. gpu:RTX2070:2(S:0)
+						descriptor := strings.Split(resource, ":")[2]
+						descriptor = strings.Split(descriptor, "(")[0]
+						node_gpus, _ :=  strconv.ParseFloat(descriptor, 64)
+						num_gpus += node_gpus
+					}
+				}
 			}
 		}
 	}