summaryrefslogtreecommitdiff
path: root/vendor/github.com/containers/common/pkg/cgroups/utils_linux.go
blob: ffdf10acaf49d8d459e11430e9ca5656db32bcd5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
//go:build linux
// +build linux

package cgroups

import (
	"bufio"
	"bytes"
	"errors"
	"fmt"
	"os"
	"path"
	"path/filepath"
	"strings"
	"sync"

	"github.com/opencontainers/runc/libcontainer/cgroups"
	"github.com/opencontainers/runc/libcontainer/configs"
	"github.com/sirupsen/logrus"
	"golang.org/x/sys/unix"
)

// WriteFile writes to a cgroup file
func WriteFile(dir, file, data string) error {
	fd, err := OpenFile(dir, file, unix.O_WRONLY)
	if err != nil {
		return err
	}
	defer fd.Close()
	for {
		_, err := fd.WriteString(data)
		if errors.Is(err, unix.EINTR) {
			logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
			continue
		}
		return err
	}
}

// OpenFile opens a cgroup file with the given flags
func OpenFile(dir, file string, flags int) (*os.File, error) {
	var resolveFlags uint64
	mode := os.FileMode(0)
	if TestMode && flags&os.O_WRONLY != 0 {
		flags |= os.O_TRUNC | os.O_CREATE
		mode = 0o600
	}
	cgroupPath := path.Join(dir, file)
	relPath := strings.TrimPrefix(cgroupPath, cgroupRoot+"/")

	var stats unix.Statfs_t
	fdTest, errOpen := unix.Openat2(-1, cgroupRoot, &unix.OpenHow{
		Flags: unix.O_DIRECTORY | unix.O_PATH,
	})
	errStat := unix.Fstatfs(fdTest, &stats)
	cgroupFd := fdTest

	resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
	if stats.Type == unix.CGROUP2_SUPER_MAGIC {
		// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
		resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
	}

	if errOpen != nil || errStat != nil || (len(relPath) == len(cgroupPath)) { // openat2 not available, use os
		fdTest, err := os.OpenFile(cgroupPath, flags, mode)
		if err != nil {
			return nil, err
		}
		if TestMode {
			return fdTest, nil
		}
		if err := unix.Fstatfs(int(fdTest.Fd()), &stats); err != nil {
			_ = fdTest.Close()
			return nil, &os.PathError{Op: "statfs", Path: cgroupPath, Err: err}
		}
		if stats.Type != unix.CGROUP_SUPER_MAGIC && stats.Type != unix.CGROUP2_SUPER_MAGIC {
			_ = fdTest.Close()
			return nil, &os.PathError{Op: "open", Path: cgroupPath, Err: errors.New("not a cgroup file")}
		}
		return fdTest, nil
	}

	fd, err := unix.Openat2(cgroupFd, relPath,
		&unix.OpenHow{
			Resolve: resolveFlags,
			Flags:   uint64(flags) | unix.O_CLOEXEC,
			Mode:    uint64(mode),
		})
	if err != nil {
		return nil, err
	}

	return os.NewFile(uintptr(fd), cgroupPath), nil
}

// ReadFile reads from a cgroup file, opening it with the read only flag
func ReadFile(dir, file string) (string, error) {
	fd, err := OpenFile(dir, file, unix.O_RDONLY)
	if err != nil {
		return "", err
	}
	defer fd.Close()
	var buf bytes.Buffer

	_, err = buf.ReadFrom(fd)
	return buf.String(), err
}

// BlkioFiles gets the proper files for blkio weights
func BlkioFiles(cgroupPath string) (wtFile, wtDevFile string) {
	var weightFile string
	var weightDeviceFile string
	// in this important since runc keeps these variables private, they won't be set
	if cgroups.PathExists(filepath.Join(cgroupPath, "blkio.weight")) {
		weightFile = "blkio.weight"
		weightDeviceFile = "blkio.weight_device"
	} else {
		weightFile = "blkio.bfq.weight"
		weightDeviceFile = "blkio.bfq.weight_device"
	}
	return weightFile, weightDeviceFile
}

// SetBlkioThrottle sets the throttle limits for the cgroup
func SetBlkioThrottle(res *configs.Resources, cgroupPath string) error {
	for _, td := range res.BlkioThrottleReadBpsDevice {
		if err := WriteFile(cgroupPath, "blkio.throttle.read_bps_device", fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)); err != nil {
			return err
		}
	}
	for _, td := range res.BlkioThrottleWriteBpsDevice {
		if err := WriteFile(cgroupPath, "blkio.throttle.write_bps_device", fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)); err != nil {
			return err
		}
	}
	for _, td := range res.BlkioThrottleReadIOPSDevice {
		if err := WriteFile(cgroupPath, "blkio.throttle.read_iops_device", td.String()); err != nil {
			return err
		}
	}
	for _, td := range res.BlkioThrottleWriteIOPSDevice {
		if err := WriteFile(cgroupPath, "blkio.throttle.write_iops_device", td.String()); err != nil {
			return err
		}
	}
	return nil
}

// Code below was moved from podman/utils/utils_supported.go and should properly better
// integrated here as some parts may be redundant.

func getCgroupProcess(procFile string, allowRoot bool) (string, error) {
	f, err := os.Open(procFile)
	if err != nil {
		return "", err
	}
	defer f.Close()

	scanner := bufio.NewScanner(f)
	cgroup := ""
	for scanner.Scan() {
		line := scanner.Text()
		parts := strings.SplitN(line, ":", 3)
		if len(parts) != 3 {
			return "", fmt.Errorf("cannot parse cgroup line %q", line)
		}
		if strings.HasPrefix(line, "0::") {
			cgroup = line[3:]
			break
		}
		if len(parts[2]) > len(cgroup) {
			cgroup = parts[2]
		}
	}
	if len(cgroup) == 0 || (!allowRoot && cgroup == "/") {
		return "", fmt.Errorf("could not find cgroup mount in %q", procFile)
	}
	return cgroup, nil
}

// GetOwnCgroup returns the cgroup for the current process.
func GetOwnCgroup() (string, error) {
	return getCgroupProcess("/proc/self/cgroup", true)
}

func GetOwnCgroupDisallowRoot() (string, error) {
	return getCgroupProcess("/proc/self/cgroup", false)
}

// GetCgroupProcess returns the cgroup for the specified process process.
func GetCgroupProcess(pid int) (string, error) {
	return getCgroupProcess(fmt.Sprintf("/proc/%d/cgroup", pid), true)
}

// MoveUnderCgroupSubtree moves the PID under a cgroup subtree.
func MoveUnderCgroupSubtree(subtree string) error {
	return MoveUnderCgroup("", subtree, nil)
}

// MoveUnderCgroup moves a group of processes to a new cgroup.
// If cgroup is the empty string, then the current calling process cgroup is used.
// If processes is empty, then the processes from the current cgroup are moved.
func MoveUnderCgroup(cgroup, subtree string, processes []uint32) error {
	procFile := "/proc/self/cgroup"
	f, err := os.Open(procFile)
	if err != nil {
		return err
	}
	defer f.Close()

	unifiedMode, err := IsCgroup2UnifiedMode()
	if err != nil {
		return err
	}

	scanner := bufio.NewScanner(f)
	for scanner.Scan() {
		line := scanner.Text()
		parts := strings.SplitN(line, ":", 3)
		if len(parts) != 3 {
			return fmt.Errorf("cannot parse cgroup line %q", line)
		}

		// root cgroup, skip it
		if parts[2] == "/" && !(unifiedMode && parts[1] == "") {
			continue
		}

		cgroupRoot := "/sys/fs/cgroup"
		// Special case the unified mount on hybrid cgroup and named hierarchies.
		// This works on Fedora 31, but we should really parse the mounts to see
		// where the cgroup hierarchy is mounted.
		if parts[1] == "" && !unifiedMode {
			// If it is not using unified mode, the cgroup v2 hierarchy is
			// usually mounted under /sys/fs/cgroup/unified
			cgroupRoot = filepath.Join(cgroupRoot, "unified")

			// Ignore the unified mount if it doesn't exist
			if _, err := os.Stat(cgroupRoot); err != nil && os.IsNotExist(err) {
				continue
			}
		} else if parts[1] != "" {
			// Assume the controller is mounted at /sys/fs/cgroup/$CONTROLLER.
			controller := strings.TrimPrefix(parts[1], "name=")
			cgroupRoot = filepath.Join(cgroupRoot, controller)
		}

		parentCgroup := cgroup
		if parentCgroup == "" {
			parentCgroup = parts[2]
		}
		newCgroup := filepath.Join(cgroupRoot, parentCgroup, subtree)
		if err := os.MkdirAll(newCgroup, 0o755); err != nil && !os.IsExist(err) {
			return err
		}

		f, err := os.OpenFile(filepath.Join(newCgroup, "cgroup.procs"), os.O_RDWR, 0o755)
		if err != nil {
			return err
		}
		defer f.Close()

		if len(processes) > 0 {
			for _, pid := range processes {
				if _, err := f.WriteString(fmt.Sprintf("%d\n", pid)); err != nil {
					logrus.Debugf("Cannot move process %d to cgroup %q: %v", pid, newCgroup, err)
				}
			}
		} else {
			processesData, err := os.ReadFile(filepath.Join(cgroupRoot, parts[2], "cgroup.procs"))
			if err != nil {
				return err
			}
			for _, pid := range bytes.Split(processesData, []byte("\n")) {
				if len(pid) == 0 {
					continue
				}
				if _, err := f.Write(pid); err != nil {
					logrus.Debugf("Cannot move process %s to cgroup %q: %v", string(pid), newCgroup, err)
				}
			}
		}
	}
	return nil
}

var (
	maybeMoveToSubCgroupSync    sync.Once
	maybeMoveToSubCgroupSyncErr error
)

// MaybeMoveToSubCgroup moves the current process in a sub cgroup when
// it is running in the root cgroup on a system that uses cgroupv2.
func MaybeMoveToSubCgroup() error {
	maybeMoveToSubCgroupSync.Do(func() {
		unifiedMode, err := IsCgroup2UnifiedMode()
		if err != nil {
			maybeMoveToSubCgroupSyncErr = err
			return
		}
		if !unifiedMode {
			maybeMoveToSubCgroupSyncErr = nil
			return
		}
		cgroup, err := GetOwnCgroup()
		if err != nil {
			maybeMoveToSubCgroupSyncErr = err
			return
		}
		if cgroup == "/" {
			maybeMoveToSubCgroupSyncErr = MoveUnderCgroupSubtree("init")
		}
	})
	return maybeMoveToSubCgroupSyncErr
}