• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/procd/jail/cgroups.c

  1 /*
  2  * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
  3  *
  4  * This program is free software; you can redistribute it and/or modify
  5  * it under the terms of the GNU Lesser General Public License version 2.1
  6  * as published by the Free Software Foundation
  7  *
  8  * This program is distributed in the hope that it will be useful,
  9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11  * GNU General Public License for more details.
 12  *
 13  * reads unified cgroup config as proposed in
 14  * https://github.com/opencontainers/runtime-spec/pull/1040
 15  * attempt conversion from cgroup1 -> cgroup2
 16  * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
 17  *
 18  * ToDo:
 19  *  - convert cgroup1 devices to eBPF program
 20  *  - convert cgroup1 net_prio and net_cls to eBPF program
 21  *  - rdma (anyone?) intelrdt (anyone?)
 22  */
 23 
 24 #define _GNU_SOURCE
 25 
 26 #include <assert.h>
 27 #include <errno.h>
 28 #include <fcntl.h>
 29 #include <stdlib.h>
 30 #include <stdio.h>
 31 #include <string.h>
 32 #include <sys/stat.h>
 33 #include <sys/mman.h>
 34 #include <unistd.h>
 35 #include <libgen.h>
 36 #include <inttypes.h>
 37 
 38 #include <libubox/avl.h>
 39 #include <libubox/avl-cmp.h>
 40 #include <libubox/blobmsg.h>
 41 #include <libubox/list.h>
 42 
 43 #include "fs.h"
 44 #include "log.h"
 45 #include "cgroups.h"
 46 
 47 #define CGROUP_ROOT "/sys/fs/cgroup/"
 48 #define CGROUP_IO_WEIGHT_MAX 10000
 49 
 50 struct cgval {
 51         struct avl_node avl;
 52         char *val;
 53 };
 54 
 55 struct avl_tree cgvals;
 56 static char *cgroup_path;
 57 static bool initialized;
 58 
 59 void cgroups_prepare(void) {
 60         initialized = false;
 61 }
 62 
 63 void cgroups_init(const char *p) {
 64         avl_init(&cgvals, avl_strcmp, false, NULL);
 65         cgroup_path = strdup(p);
 66         initialized = true;
 67 }
 68 
 69 static void cgroups_set(const char *key, const char *val)
 70 {
 71         struct cgval *valp;
 72 
 73         valp = avl_find_element(&cgvals, key, valp, avl);
 74         if (!valp) {
 75                 valp = malloc(sizeof(struct cgval));
 76                 assert(valp != NULL);
 77                 valp->avl.key = strdup(key);
 78                 avl_insert(&cgvals, &valp->avl);
 79         } else {
 80                 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val);
 81                 free(valp->val);
 82         }
 83 
 84         valp->val = strdup(val);
 85 }
 86 
 87 void cgroups_free(void)
 88 {
 89         struct cgval *valp, *tmp;
 90 
 91         if (initialized) {
 92                 avl_remove_all_elements(&cgvals, valp, avl, tmp) {
 93                         free((void *)(valp->avl.key));
 94                         free(valp->val);
 95                         free(valp);
 96                 }
 97                 free(cgroup_path);
 98         }
 99 }
100 
101 void cgroups_apply(pid_t pid)
102 {
103         struct cgval *valp;
104         char *cdir, *ent;
105         int fd;
106         size_t maxlen = strlen("cgroup.subtree_control");
107 
108         bool cpuset = false,
109              cpu = false,
110              hugetlb = false,
111              io = false,
112              memory = false,
113              pids = false,
114              rdma = false;
115 
116         char subtree_control[64] = { 0 };
117 
118         DEBUG("using cgroup path %s\n", cgroup_path);
119         mkdir_p(cgroup_path, 0700);
120 
121         /* find which controllers need to be enabled */
122         avl_for_each_element(&cgvals, valp, avl) {
123                 ent = (char *)valp->avl.key;
124                 if (strlen(ent) > maxlen)
125                         maxlen = strlen(ent);
126 
127                 if (!strncmp("cpuset.", ent, 7))
128                         cpuset = true;
129                 else if (!strncmp("cpu.", ent, 4))
130                         cpu = true;
131                 else if (!strncmp("hugetlb.", ent, 8))
132                         hugetlb = true;
133                 else if (!strncmp("io.", ent, 3))
134                         io = true;
135                 else if (!strncmp("memory.", ent, 7))
136                         memory = true;
137                 else if (!strncmp("pids.", ent, 5))
138                         pids = true;
139                 else if (!strncmp("rdma.", ent, 5))
140                         pids = true;
141         }
142 
143         maxlen += strlen(cgroup_path) + 2;
144 
145         if (cpuset)
146                 strcat(subtree_control, "+cpuset ");
147 
148         if (cpu)
149                 strcat(subtree_control, "+cpu ");
150 
151         if (hugetlb)
152                 strcat(subtree_control, "+hugetlb ");
153 
154         if (io)
155                 strcat(subtree_control, "+io ");
156 
157         if (memory)
158                 strcat(subtree_control, "+memory ");
159 
160         if (pids)
161                 strcat(subtree_control, "+pids ");
162 
163         if (rdma)
164                 strcat(subtree_control, "+rdma ");
165 
166         /* remove trailing space */
167         ent = strchr(subtree_control, '\0') - 1;
168         *ent = '\0';
169 
170         ent = malloc(maxlen);
171         assert(ent != 0);
172 
173         DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control);
174         cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2];
175         while ((cdir = strchr(cdir + 1, '/'))) {
176                 *cdir = '\0';
177                 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path);
178                 DEBUG(" * %s\n", ent);
179                 fd = open(ent, O_WRONLY);
180                 assert(fd != -1);
181                 write(fd, subtree_control, strlen(subtree_control));
182                 close(fd);
183                 *cdir = '/';
184         }
185 
186         avl_for_each_element(&cgvals, valp, avl) {
187                 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val);
188                 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key);
189                 fd = open(ent, O_WRONLY);
190                 if (fd == -1) {
191                         ERROR("can't open %s: %m\n", ent);
192                         continue;
193                 }
194                 if (dprintf(fd, "%s", valp->val) < 0) {
195                         ERROR("can't write to %s: %m\n", ent);
196                 };
197                 close(fd);
198         }
199 
200         snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs");
201         fd = open(ent, O_WRONLY);
202         assert(fd != -1);
203         dprintf(fd, "%d", pid);
204         close(fd);
205 
206         free(ent);
207 }
208 
209 enum {
210         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR,
211         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR,
212         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT,
213         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT,
214         __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX,
215 };
216 
217 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = {
218         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
219         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
220         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
221         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
222 };
223 
224 enum {
225         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR,
226         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR,
227         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE,
228         __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX,
229 };
230 
231 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = {
232         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
233         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
234         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 },
235 };
236 
237 enum {
238         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT,
239         OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT,
240         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE,
241         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE,
242         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE,
243         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE,
244         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE,
245         __OCI_LINUX_CGROUPS_BLOCKIO_MAX,
246 };
247 
248 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = {
249         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
250         [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
251         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY },
252         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY },
253         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY },
254         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY },
255         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY },
256 };
257 
258 struct posix_dev {
259         uint64_t major;
260         uint64_t minor;
261 };
262 
263 struct iomax_line {
264         struct avl_node avl;
265         struct posix_dev dev;
266         uint64_t rbps;
267         uint64_t wbps;
268         uint64_t riops;
269         uint64_t wiops;
270 };
271 
272 static int avl_devcmp(const void *k1, const void *k2, void *ptr)
273 {
274         struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2;
275 
276         if (d1->major < d2->major)
277                 return -1;
278 
279         if (d1->major > d2->major)
280                 return 1;
281 
282         if (d1->minor < d2->minor)
283                 return -1;
284 
285         if (d1->minor > d2->minor)
286                 return 1;
287 
288         return 0;
289 }
290 
291 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor)
292 {
293         struct iomax_line *l;
294         struct posix_dev d;
295         d.major = major;
296         d.minor = minor;
297         l = avl_find_element(iomax, &d, l, avl);
298         if (!l) {
299                 l = malloc(sizeof(struct iomax_line));
300                 assert(l != NULL);
301                 l->dev.major = d.major;
302                 l->dev.minor = d.minor;
303                 l->avl.key = &l->dev;
304                 l->rbps = -1;
305                 l->wbps = -1;
306                 l->riops = -1;
307                 l->wiops = -1;
308                 avl_insert(iomax, &l->avl);
309         }
310 
311         return l;
312 }
313 
314 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg)
315 {
316         struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX],
317                          *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX],
318                          *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX],
319                          *cur;
320         int rem;
321         int weight = -1, leafweight = -1;
322         size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1;
323         char **weightstrs = NULL, **iomaxstrs = NULL, **curstr;
324         char *weightstr, *iomaxstr;
325         struct avl_tree iomax;
326         struct iomax_line *curiomax, *tmp;
327 
328         blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
329 
330         if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) {
331                 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
332                 ++numweightstrs;
333         }
334 
335         if (weight > CGROUP_IO_WEIGHT_MAX)
336                 return ERANGE;
337 
338         if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT])
339                 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
340 
341         if (leafweight > CGROUP_IO_WEIGHT_MAX)
342                 return ERANGE;
343 
344         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem)
345                 ++numweightstrs;
346 
347         weightstrs = calloc(numweightstrs + 1, sizeof(char *));
348         assert(weightstrs != 0);
349         numweightstrs = 0;
350 
351         if (weight > -1)
352                 asprintf(&weightstrs[numweightstrs++], "default %d", weight);
353 
354         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) {
355                 uint64_t major, minor;
356                 int devweight = weight, devleafweight = leafweight;
357 
358                 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur));
359                 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] ||
360                     !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR])
361                         return ENODATA;
362 
363                 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] &&
364                     !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
365                         return ENODATA;
366 
367                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT])
368                         devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]);
369 
370                 if (devweight > CGROUP_IO_WEIGHT_MAX)
371                         return ERANGE;
372 
373                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
374                         devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]);
375 
376                 if (devleafweight > CGROUP_IO_WEIGHT_MAX)
377                         return ERANGE;
378 
379                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
380                         return ENOTSUP;
381 
382                 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]);
383                 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]);
384 
385                 asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight);
386         }
387 
388         if (numweightstrs) {
389                 curstr = weightstrs;
390                 while (*curstr)
391                         strtotlen += strlen(*(curstr++)) + 1;
392 
393                 weightstr = calloc(strtotlen, sizeof(char));
394                 assert(weightstr != 0);
395 
396                 curstr = weightstrs;
397                 while (*curstr) {
398                         strcat(weightstr, *curstr);
399                         strcat(weightstr, "\n");
400                         free(*(curstr++));
401                 }
402 
403                 cgroups_set("io.bfq.weight", weightstr);
404                 free(weightstr);
405         };
406 
407         free(weightstrs);
408 
409         avl_init(&iomax, avl_devcmp, false, NULL);
410 
411         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) {
412                 struct iomax_line *l;
413 
414                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
415 
416                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
417                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
418                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
419                         return ENODATA;
420 
421                 l = get_iomax_line(&iomax,
422                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
423                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
424 
425                 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
426         }
427 
428         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) {
429                 struct iomax_line *l;
430 
431                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
432 
433                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
434                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
435                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
436                         return ENODATA;
437 
438                 l = get_iomax_line(&iomax,
439                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
440                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
441 
442                 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
443         }
444 
445         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) {
446                 struct iomax_line *l;
447 
448                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
449 
450                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
451                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
452                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
453                         return ENODATA;
454 
455                 l = get_iomax_line(&iomax,
456                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
457                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
458 
459                 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
460         }
461 
462         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) {
463                 struct iomax_line *l;
464 
465                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
466 
467                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
468                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
469                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
470                         return ENODATA;
471 
472                 l = get_iomax_line(&iomax,
473                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
474                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
475 
476                 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
477         }
478 
479         avl_for_each_element(&iomax, curiomax, avl)
480                 ++numiomaxstrs;
481 
482         if (!numiomaxstrs)
483                 return 0;
484 
485         iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *));
486         assert(iomaxstrs != 0);
487         numiomaxstrs = 0;
488 
489         avl_for_each_element(&iomax, curiomax, avl) {
490                 char iomaxlstr[160];
491                 char lstr[32];
492 
493                 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor);
494 
495                 if (curiomax->rbps != -1) {
496                         sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps);
497                         strcat(iomaxlstr, lstr);
498                 }
499                 if (curiomax->wbps != -1) {
500                         sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps);
501                         strcat(iomaxlstr, lstr);
502                 }
503                 if (curiomax->riops != -1) {
504                         sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops);
505                         strcat(iomaxlstr, lstr);
506                 }
507                 if (curiomax->wiops != -1) {
508                         sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops);
509                         strcat(iomaxlstr, lstr);
510                 }
511 
512                 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr);
513         }
514 
515         avl_for_each_element_safe(&iomax, curiomax, avl, tmp) {
516                 avl_delete(&iomax, &curiomax->avl);
517                 free(curiomax);
518         }
519 
520         strtotlen = 1; /* 1 accounts for \0 at end of string */
521         if (numiomaxstrs) {
522                 curstr = iomaxstrs;
523                 while (*curstr)
524                         strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */
525 
526                 iomaxstr = calloc(strtotlen, sizeof(char));
527                 assert(iomaxstr != 0);
528                 curstr = iomaxstrs;
529 
530                 while (*curstr) {
531                         strcat(iomaxstr, *curstr);
532                         strcat(iomaxstr, "\n");
533                         free(*(curstr++));
534                 }
535 
536                 cgroups_set("io.max", iomaxstr);
537                 free(iomaxstr);
538         };
539 
540         free(iomaxstrs);
541 
542         return 0;
543 }
544 
545 
546 enum {
547         OCI_LINUX_CGROUPS_CPU_SHARES,
548         OCI_LINUX_CGROUPS_CPU_PERIOD,
549         OCI_LINUX_CGROUPS_CPU_QUOTA,
550         OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME,
551         OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD,
552         OCI_LINUX_CGROUPS_CPU_CPUS,
553         OCI_LINUX_CGROUPS_CPU_MEMS,
554         __OCI_LINUX_CGROUPS_CPU_MAX,
555 };
556 
557 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = {
558         [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 },
559         [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 },
560         [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */
561         [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 },
562         [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 },
563         [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING },
564         [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING },
565 };
566 
567 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg)
568 {
569         struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX];
570         uint64_t shares, period = 0;
571         int64_t quota = -2; /* unset */
572         char tmp[32] = { 0 };
573 
574         blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
575 
576         if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] ||
577             tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME])
578                 return ENOTSUP; /* no equivalent in cgroup2 */
579 
580         if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) {
581                 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]);
582                 if ((shares < 2) || (shares > 262144))
583                         return ERANGE;
584 
585                 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142));
586                 cgroups_set("cpu.weight", tmp);
587                 tmp[0] = '\0';
588         }
589 
590         if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA])
591                 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]);
592 
593         if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD])
594                 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]);
595 
596         if (period) {
597                 if (quota >= 0)
598                         snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period);
599                 else
600                         snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */
601         } else if (quota >= 0) {
602                 snprintf(tmp, sizeof(tmp), "%" PRId64, quota);
603         } else if (quota == -1) {
604                 strcpy(tmp, "max");
605         }
606 
607         if (tmp[0])
608                 cgroups_set("cpu.max", tmp);
609 
610         if (tb[OCI_LINUX_CGROUPS_CPU_CPUS])
611                 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS]));
612 
613         if (tb[OCI_LINUX_CGROUPS_CPU_MEMS])
614                 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS]));
615 
616         return 0;
617 }
618 
619 
620 enum {
621         OCI_LINUX_CGROUPS_MEMORY_LIMIT,
622         OCI_LINUX_CGROUPS_MEMORY_RESERVATION,
623         OCI_LINUX_CGROUPS_MEMORY_SWAP,
624         OCI_LINUX_CGROUPS_MEMORY_KERNEL,
625         OCI_LINUX_CGROUPS_MEMORY_KERNELTCP,
626         OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS,
627         OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER,
628         OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY,
629         __OCI_LINUX_CGROUPS_MEMORY_MAX,
630 };
631 
632 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = {
633         [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */
634         [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */
635         [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */
636         [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
637         [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
638         [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 },
639         [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL },
640         [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] { "useHierarchy", BLOBMSG_TYPE_BOOL },
641 };
642 
643 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg)
644 {
645         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
646         char tmp[32] = { 0 };
647         int64_t limit, swap, reservation;
648 
649         blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
650 
651         /*
652          * not all properties of the OCI memory section can be mapped to cgroup2
653          * kernel memory accounting is always enabled and included in the set
654          *   memory limit, hence these options can be ignored
655          * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
656          *   preventing self-upgrade (but allow downgrade)
657          *
658          * see also https://github.com/opencontainers/runtime-spec/issues/1005
659          */
660         if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] ||
661             tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] ||
662             tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY])
663                 return ENOTSUP;
664 
665 
666         if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) {
667                 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]);
668                 if (limit == -1)
669                         strcpy(tmp, "max");
670                 else
671                         snprintf(tmp, sizeof(tmp), "%" PRId64, limit);
672 
673                 cgroups_set("memory.max", tmp);
674         }
675 
676         if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) {
677                 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]);
678 
679                 if (reservation == -1)
680                         strcpy(tmp, "max");
681                 else
682                         snprintf(tmp, sizeof(tmp), "%" PRId64, reservation);
683 
684                 cgroups_set("memory.low", tmp);
685         }
686 
687         /* OCI 'swap' acounts for memory+swap */
688         if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) {
689                 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]);
690 
691                 if (swap == -1)
692                         strcpy(tmp, "max");
693                 else if (limit == -1 || (limit < swap))
694                         snprintf(tmp, sizeof(tmp), "%" PRId64, swap);
695                 else
696                         snprintf(tmp, sizeof(tmp), "%" PRId64, limit - swap);
697 
698                 cgroups_set("memory.swap_max", tmp);
699         }
700 
701         return 0;
702 }
703 
704 
705 enum {
706         OCI_LINUX_CGROUPS_PIDS_LIMIT,
707         __OCI_LINUX_CGROUPS_PIDS_MAX,
708 };
709 
710 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = {
711         [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 },
712 };
713 
714 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg)
715 {
716         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
717         char tmp[32] = { 0 };
718 
719         blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
720 
721         if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])
722                 return EINVAL;
723 
724         snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]));
725 
726         cgroups_set("pids.max", tmp);
727 
728         return 0;
729 }
730 
731 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg)
732 {
733         struct blob_attr *cur;
734         int rem;
735 
736         blobmsg_for_each_attr(cur, msg, rem) {
737                 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
738                         return EINVAL;
739 
740                 /* restrict keys */
741                 if (strchr(blobmsg_name(cur), '/') ||
742                     !strcmp(blobmsg_name(cur), "cgroup.subtree_control") ||
743                     !strcmp(blobmsg_name(cur), "cgroup.procs") ||
744                     !strcmp(blobmsg_name(cur), "cgroup.threads") ||
745                     !strcmp(blobmsg_name(cur), "cgroup.freeze"))
746                         return EINVAL;
747 
748                 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur));
749         }
750 
751         return 0;
752 }
753 
754 enum {
755         OCI_LINUX_CGROUPS_BLOCKIO,
756         OCI_LINUX_CGROUPS_CPU,
757         OCI_LINUX_CGROUPS_DEVICES,
758         OCI_LINUX_CGROUPS_HUGEPAGELIMITS,
759         OCI_LINUX_CGROUPS_INTELRDT,
760         OCI_LINUX_CGROUPS_MEMORY,
761         OCI_LINUX_CGROUPS_NETWORK,
762         OCI_LINUX_CGROUPS_PIDS,
763         OCI_LINUX_CGROUPS_RDMA,
764         OCI_LINUX_CGROUPS_UNIFIED,
765         __OCI_LINUX_CGROUPS_MAX,
766 };
767 
768 static const struct blobmsg_policy oci_linux_cgroups_policy[] = {
769         [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE },
770         [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE },
771         [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
772         [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY },
773         [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE },
774         [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE },
775         [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE },
776         [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE },
777         [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE },
778         [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE },
779 };
780 
781 int parseOCIlinuxcgroups(struct blob_attr *msg)
782 {
783         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX];
784         int ret;
785 
786         blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
787 
788         if (tb[OCI_LINUX_CGROUPS_DEVICES] ||
789             tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] ||
790             tb[OCI_LINUX_CGROUPS_INTELRDT] ||
791             tb[OCI_LINUX_CGROUPS_NETWORK] ||
792             tb[OCI_LINUX_CGROUPS_RDMA])
793                 return ENOTSUP;
794 
795         if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) {
796                 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]);
797                 if (ret)
798                         return ret;
799         }
800 
801         if (tb[OCI_LINUX_CGROUPS_CPU]) {
802                 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]);
803                 if (ret)
804                         return ret;
805         }
806 
807         if (tb[OCI_LINUX_CGROUPS_MEMORY]) {
808                 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]);
809                 if (ret)
810                         return ret;
811         }
812 
813         if (tb[OCI_LINUX_CGROUPS_PIDS]) {
814                 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]);
815                 if (ret)
816                         return ret;
817         }
818 
819         if (tb[OCI_LINUX_CGROUPS_UNIFIED]) {
820                 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]);
821                 if (ret)
822                         return ret;
823         }
824 
825         return 0;
826 }
827 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt