• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/procd/jail/cgroups.c

  1 /*
  2  * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
  3  *
  4  * This program is free software; you can redistribute it and/or modify
  5  * it under the terms of the GNU Lesser General Public License version 2.1
  6  * as published by the Free Software Foundation
  7  *
  8  * This program is distributed in the hope that it will be useful,
  9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11  * GNU General Public License for more details.
 12  *
 13  * reads unified cgroup config as proposed in
 14  * https://github.com/opencontainers/runtime-spec/pull/1040
 15  * attempt conversion from cgroup1 -> cgroup2
 16  * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
 17  *
 18  * ToDo:
 19  *  - convert cgroup1 net_prio and net_cls to eBPF program
 20  *  - rdma (anyone?) intelrdt (anyone?)
 21  */
 22 
 23 #define _GNU_SOURCE
 24 
 25 #include <errno.h>
 26 #include <fcntl.h>
 27 #include <stdlib.h>
 28 #include <stdio.h>
 29 #include <string.h>
 30 #include <sys/stat.h>
 31 #include <sys/mman.h>
 32 #include <unistd.h>
 33 #include <libgen.h>
 34 #include <inttypes.h>
 35 
 36 #include <libubox/avl.h>
 37 #include <libubox/avl-cmp.h>
 38 #include <libubox/blobmsg.h>
 39 #include <libubox/list.h>
 40 #include <libubox/utils.h>
 41 
 42 #include "log.h"
 43 #include "cgroups.h"
 44 #include "cgroups-bpf.h"
 45 
 46 #define CGROUP_ROOT "/sys/fs/cgroup/"
 47 #define CGROUP_IO_WEIGHT_MAX 10000
 48 
 49 struct cgval {
 50         struct avl_node avl;
 51         char *val;
 52 };
 53 
 54 struct avl_tree cgvals;
 55 static char *cgroup_path;
 56 static bool initialized;
 57 
 58 void cgroups_prepare(void) {
 59         initialized = false;
 60 }
 61 
 62 void cgroups_init(const char *p) {
 63         avl_init(&cgvals, avl_strcmp, false, NULL);
 64         cgroup_path = strdup(p);
 65         initialized = true;
 66 }
 67 
 68 static void cgroups_set(const char *key, const char *val)
 69 {
 70         struct cgval *valp;
 71 
 72         valp = avl_find_element(&cgvals, key, valp, avl);
 73         if (!valp) {
 74                 valp = malloc(sizeof(struct cgval));
 75                 if (!valp)
 76                         exit(ENOMEM);
 77 
 78                 valp->avl.key = strdup(key);
 79                 avl_insert(&cgvals, &valp->avl);
 80         } else {
 81                 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val);
 82                 free(valp->val);
 83         }
 84 
 85         valp->val = strdup(val);
 86 }
 87 
 88 void cgroups_free(void)
 89 {
 90         struct cgval *valp, *tmp;
 91 
 92         if (initialized) {
 93                 avl_remove_all_elements(&cgvals, valp, avl, tmp) {
 94                         free((void *)(valp->avl.key));
 95                         free(valp->val);
 96                         free(valp);
 97                 }
 98                 free(cgroup_path);
 99         }
100 }
101 
102 void cgroups_apply(pid_t pid)
103 {
104         struct cgval *valp;
105         char *cdir, *ent;
106         int fd;
107         size_t maxlen = strlen("cgroup.subtree_control");
108 
109         bool cpuset = false,
110              cpu = false,
111              hugetlb = false,
112              io = false,
113              memory = false,
114              pids = false,
115              rdma = false;
116 
117         char subtree_control[64] = { 0 };
118 
119         DEBUG("using cgroup path %s\n", cgroup_path);
120         mkdir_p(cgroup_path, 0700);
121 
122         /* find which controllers need to be enabled */
123         avl_for_each_element(&cgvals, valp, avl) {
124                 ent = (char *)valp->avl.key;
125                 if (strlen(ent) > maxlen)
126                         maxlen = strlen(ent);
127 
128                 if (!strncmp("cpuset.", ent, 7))
129                         cpuset = true;
130                 else if (!strncmp("cpu.", ent, 4))
131                         cpu = true;
132                 else if (!strncmp("hugetlb.", ent, 8))
133                         hugetlb = true;
134                 else if (!strncmp("io.", ent, 3))
135                         io = true;
136                 else if (!strncmp("memory.", ent, 7))
137                         memory = true;
138                 else if (!strncmp("pids.", ent, 5))
139                         pids = true;
140                 else if (!strncmp("rdma.", ent, 5))
141                         rdma = true;
142         }
143 
144         maxlen += strlen(cgroup_path) + 2;
145 
146         if (cpuset)
147                 strcat(subtree_control, "+cpuset ");
148 
149         if (cpu)
150                 strcat(subtree_control, "+cpu ");
151 
152         if (hugetlb)
153                 strcat(subtree_control, "+hugetlb ");
154 
155         if (io)
156                 strcat(subtree_control, "+io ");
157 
158         if (memory)
159                 strcat(subtree_control, "+memory ");
160 
161         if (pids)
162                 strcat(subtree_control, "+pids ");
163 
164         if (rdma)
165                 strcat(subtree_control, "+rdma ");
166 
167         /* remove trailing space */
168         ent = strchr(subtree_control, '\0') - 1;
169         *ent = '\0';
170 
171         ent = malloc(maxlen);
172         if (!ent)
173                 exit(ENOMEM);
174 
175         DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control);
176         cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2];
177         while ((cdir = strchr(cdir + 1, '/'))) {
178                 *cdir = '\0';
179                 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path);
180                 DEBUG(" * %s\n", ent);
181                 if ((fd = open(ent, O_WRONLY)) < 0) {
182                         ERROR("can't open %s: %m\n", ent);
183                         continue;
184                 }
185 
186                 if (write(fd, subtree_control, strlen(subtree_control)) == -1) {
187                         ERROR("can't write to %s: %m\n", ent);
188                         close(fd);
189                         continue;
190                 }
191 
192                 close(fd);
193                 *cdir = '/';
194         }
195 
196         avl_for_each_element(&cgvals, valp, avl) {
197                 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val);
198                 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key);
199                 fd = open(ent, O_WRONLY);
200                 if (fd < 0) {
201                         ERROR("can't open %s: %m\n", ent);
202                         continue;
203                 }
204                 if (dprintf(fd, "%s", valp->val) < 0) {
205                         ERROR("can't write to %s: %m\n", ent);
206                 };
207                 close(fd);
208         }
209 
210         int dirfd = open(cgroup_path, O_DIRECTORY);
211         if (dirfd < 0) {
212                 ERROR("can't open %s: %m\n", cgroup_path);
213         } else {
214                 attach_cgroups_ebpf(dirfd);
215                 close(dirfd);
216         }
217 
218         snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs");
219         fd = open(ent, O_WRONLY);
220         if (fd < 0) {
221                 ERROR("can't open %s: %m\n", cgroup_path);
222         } else {
223                 dprintf(fd, "%d", pid);
224                 close(fd);
225         }
226 
227         free(ent);
228 }
229 
230 enum {
231         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR,
232         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR,
233         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT,
234         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT,
235         __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX,
236 };
237 
238 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = {
239         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
240         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
241         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
242         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
243 };
244 
245 enum {
246         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR,
247         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR,
248         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE,
249         __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX,
250 };
251 
252 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = {
253         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
254         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
255         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 },
256 };
257 
258 enum {
259         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT,
260         OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT,
261         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE,
262         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE,
263         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE,
264         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE,
265         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE,
266         __OCI_LINUX_CGROUPS_BLOCKIO_MAX,
267 };
268 
269 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = {
270         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
271         [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
272         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY },
273         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY },
274         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY },
275         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY },
276         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY },
277 };
278 
279 struct posix_dev {
280         uint64_t major;
281         uint64_t minor;
282 };
283 
284 struct iomax_line {
285         struct avl_node avl;
286         struct posix_dev dev;
287         uint64_t rbps;
288         uint64_t wbps;
289         uint64_t riops;
290         uint64_t wiops;
291 };
292 
293 static int avl_devcmp(const void *k1, const void *k2, void *ptr)
294 {
295         struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2;
296 
297         if (d1->major < d2->major)
298                 return -1;
299 
300         if (d1->major > d2->major)
301                 return 1;
302 
303         if (d1->minor < d2->minor)
304                 return -1;
305 
306         if (d1->minor > d2->minor)
307                 return 1;
308 
309         return 0;
310 }
311 
312 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor)
313 {
314         struct iomax_line *l;
315         struct posix_dev d;
316         d.major = major;
317         d.minor = minor;
318         l = avl_find_element(iomax, &d, l, avl);
319         if (!l) {
320                 l = malloc(sizeof(struct iomax_line));
321                 if (!l)
322                         exit(ENOMEM);
323 
324                 l->dev.major = d.major;
325                 l->dev.minor = d.minor;
326                 l->avl.key = &l->dev;
327                 l->rbps = -1;
328                 l->wbps = -1;
329                 l->riops = -1;
330                 l->wiops = -1;
331                 avl_insert(iomax, &l->avl);
332         }
333 
334         return l;
335 }
336 
337 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg)
338 {
339         struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX],
340                          *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX],
341                          *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX],
342                          *cur;
343         int rem;
344         int weight = -1, leafweight = -1;
345         size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1;
346         char **weightstrs = NULL, **iomaxstrs = NULL, **curstr;
347         char *weightstr, *iomaxstr;
348         struct avl_tree iomax;
349         struct iomax_line *curiomax, *tmp;
350 
351         blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
352 
353         if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) {
354                 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
355                 ++numweightstrs;
356         }
357 
358         if (weight > CGROUP_IO_WEIGHT_MAX)
359                 return ERANGE;
360 
361         if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT])
362                 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
363 
364         if (leafweight > CGROUP_IO_WEIGHT_MAX)
365                 return ERANGE;
366 
367         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem)
368                 ++numweightstrs;
369 
370         weightstrs = calloc(numweightstrs + 1, sizeof(char *));
371         if (!weightstrs)
372                 exit(ENOMEM);
373 
374         numweightstrs = 0;
375 
376         if (weight > -1)
377                 if (asprintf(&weightstrs[numweightstrs++], "default %d", weight) < 0)
378                         return ENOMEM;
379 
380         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) {
381                 uint64_t major, minor;
382                 int devweight = weight, devleafweight = leafweight;
383 
384                 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur));
385                 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] ||
386                     !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR])
387                         return ENODATA;
388 
389                 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] &&
390                     !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
391                         return ENODATA;
392 
393                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT])
394                         devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]);
395 
396                 if (devweight > CGROUP_IO_WEIGHT_MAX)
397                         return ERANGE;
398 
399                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
400                         devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]);
401 
402                 if (devleafweight > CGROUP_IO_WEIGHT_MAX)
403                         return ERANGE;
404 
405                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
406                         return ENOTSUP;
407 
408                 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]);
409                 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]);
410 
411                 if (asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight) < 0)
412                         return ENOMEM;
413         }
414 
415         if (numweightstrs) {
416                 curstr = weightstrs;
417                 while (*curstr)
418                         strtotlen += strlen(*(curstr++)) + 1;
419 
420                 weightstr = calloc(strtotlen, sizeof(char));
421                 if (!weightstr)
422                         exit(ENOMEM);
423 
424                 curstr = weightstrs;
425                 while (*curstr) {
426                         strcat(weightstr, *curstr);
427                         strcat(weightstr, "\n");
428                         free(*(curstr++));
429                 }
430 
431                 cgroups_set("io.bfq.weight", weightstr);
432                 free(weightstr);
433         };
434 
435         free(weightstrs);
436 
437         avl_init(&iomax, avl_devcmp, false, NULL);
438 
439         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) {
440                 struct iomax_line *l;
441 
442                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
443 
444                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
445                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
446                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
447                         return ENODATA;
448 
449                 l = get_iomax_line(&iomax,
450                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
451                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
452 
453                 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
454         }
455 
456         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) {
457                 struct iomax_line *l;
458 
459                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
460 
461                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
462                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
463                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
464                         return ENODATA;
465 
466                 l = get_iomax_line(&iomax,
467                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
468                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
469 
470                 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
471         }
472 
473         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) {
474                 struct iomax_line *l;
475 
476                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
477 
478                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
479                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
480                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
481                         return ENODATA;
482 
483                 l = get_iomax_line(&iomax,
484                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
485                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
486 
487                 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
488         }
489 
490         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) {
491                 struct iomax_line *l;
492 
493                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
494 
495                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
496                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
497                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
498                         return ENODATA;
499 
500                 l = get_iomax_line(&iomax,
501                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
502                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
503 
504                 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
505         }
506 
507         avl_for_each_element(&iomax, curiomax, avl)
508                 ++numiomaxstrs;
509 
510         if (!numiomaxstrs)
511                 return 0;
512 
513         iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *));
514         if (!iomaxstrs)
515                 exit(ENOMEM);
516 
517         numiomaxstrs = 0;
518 
519         avl_for_each_element(&iomax, curiomax, avl) {
520                 char iomaxlstr[160];
521                 char lstr[32];
522 
523                 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor);
524 
525                 if (curiomax->rbps != -1) {
526                         sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps);
527                         strcat(iomaxlstr, lstr);
528                 }
529                 if (curiomax->wbps != -1) {
530                         sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps);
531                         strcat(iomaxlstr, lstr);
532                 }
533                 if (curiomax->riops != -1) {
534                         sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops);
535                         strcat(iomaxlstr, lstr);
536                 }
537                 if (curiomax->wiops != -1) {
538                         sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops);
539                         strcat(iomaxlstr, lstr);
540                 }
541 
542                 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr);
543         }
544 
545         avl_for_each_element_safe(&iomax, curiomax, avl, tmp) {
546                 avl_delete(&iomax, &curiomax->avl);
547                 free(curiomax);
548         }
549 
550         strtotlen = 1; /* 1 accounts for \0 at end of string */
551         if (numiomaxstrs) {
552                 curstr = iomaxstrs;
553                 while (*curstr)
554                         strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */
555 
556                 iomaxstr = calloc(strtotlen, sizeof(char));
557                 if (!iomaxstr)
558                         exit(ENOMEM);
559 
560                 curstr = iomaxstrs;
561 
562                 while (*curstr) {
563                         strcat(iomaxstr, *curstr);
564                         strcat(iomaxstr, "\n");
565                         free(*(curstr++));
566                 }
567 
568                 cgroups_set("io.max", iomaxstr);
569                 free(iomaxstr);
570         };
571 
572         free(iomaxstrs);
573 
574         return 0;
575 }
576 
577 
578 enum {
579         OCI_LINUX_CGROUPS_CPU_SHARES,
580         OCI_LINUX_CGROUPS_CPU_PERIOD,
581         OCI_LINUX_CGROUPS_CPU_QUOTA,
582         OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME,
583         OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD,
584         OCI_LINUX_CGROUPS_CPU_CPUS,
585         OCI_LINUX_CGROUPS_CPU_MEMS,
586         __OCI_LINUX_CGROUPS_CPU_MAX,
587 };
588 
589 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = {
590         [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 },
591         [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 },
592         [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */
593         [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 },
594         [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 },
595         [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING },
596         [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING },
597 };
598 
599 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg)
600 {
601         struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX];
602         uint64_t shares, period = 0;
603         int64_t quota = -2; /* unset */
604         char tmp[32] = { 0 };
605 
606         blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
607 
608         if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] ||
609             tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME])
610                 return ENOTSUP; /* no equivalent in cgroup2 */
611 
612         if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) {
613                 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]);
614                 if ((shares < 2) || (shares > 262144))
615                         return ERANGE;
616 
617                 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142));
618                 cgroups_set("cpu.weight", tmp);
619                 tmp[0] = '\0';
620         }
621 
622         if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA])
623                 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]);
624 
625         if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD])
626                 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]);
627 
628         if (period) {
629                 if (quota >= 0)
630                         snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period);
631                 else
632                         snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */
633         } else if (quota >= 0) {
634                 snprintf(tmp, sizeof(tmp), "%" PRId64, quota);
635         } else if (quota == -1) {
636                 strcpy(tmp, "max");
637         }
638 
639         if (tmp[0])
640                 cgroups_set("cpu.max", tmp);
641 
642         if (tb[OCI_LINUX_CGROUPS_CPU_CPUS])
643                 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS]));
644 
645         if (tb[OCI_LINUX_CGROUPS_CPU_MEMS])
646                 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS]));
647 
648         return 0;
649 }
650 
651 
652 enum {
653         OCI_LINUX_CGROUPS_MEMORY_LIMIT,
654         OCI_LINUX_CGROUPS_MEMORY_RESERVATION,
655         OCI_LINUX_CGROUPS_MEMORY_SWAP,
656         OCI_LINUX_CGROUPS_MEMORY_KERNEL,
657         OCI_LINUX_CGROUPS_MEMORY_KERNELTCP,
658         OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS,
659         OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER,
660         OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY,
661         __OCI_LINUX_CGROUPS_MEMORY_MAX,
662 };
663 
664 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = {
665         [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */
666         [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */
667         [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */
668         [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
669         [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
670         [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 },
671         [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL },
672         [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] { "useHierarchy", BLOBMSG_TYPE_BOOL },
673 };
674 
675 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg)
676 {
677         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
678         char tmp[32] = { 0 };
679         int64_t limit = -1, swap, reservation;
680 
681         blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
682 
683         /*
684          * not all properties of the OCI memory section can be mapped to cgroup2
685          * kernel memory accounting is always enabled and included in the set
686          *   memory limit, hence these options can be ignored
687          * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
688          *   preventing self-upgrade (but allow downgrade)
689          *
690          * see also https://github.com/opencontainers/runtime-spec/issues/1005
691          */
692         if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] ||
693             tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] ||
694             tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY])
695                 return ENOTSUP;
696 
697 
698         if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) {
699                 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]);
700                 if (limit == -1)
701                         strcpy(tmp, "max");
702                 else
703                         snprintf(tmp, sizeof(tmp), "%" PRId64, limit);
704 
705                 cgroups_set("memory.max", tmp);
706         }
707 
708         if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) {
709                 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]);
710 
711                 if (reservation == -1)
712                         strcpy(tmp, "max");
713                 else
714                         snprintf(tmp, sizeof(tmp), "%" PRId64, reservation);
715 
716                 cgroups_set("memory.low", tmp);
717         }
718 
719         /* OCI 'swap' acounts for memory+swap */
720         if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) {
721                 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]);
722 
723                 if (swap == -1)
724                         strcpy(tmp, "max");
725                 else if (limit == -1 || (limit < swap))
726                         snprintf(tmp, sizeof(tmp), "%" PRId64, swap);
727                 else
728                         snprintf(tmp, sizeof(tmp), "%" PRId64, limit - swap);
729 
730                 cgroups_set("memory.swap_max", tmp);
731         }
732 
733         return 0;
734 }
735 
736 
737 enum {
738         OCI_LINUX_CGROUPS_PIDS_LIMIT,
739         __OCI_LINUX_CGROUPS_PIDS_MAX,
740 };
741 
742 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = {
743         [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 },
744 };
745 
746 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg)
747 {
748         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
749         char tmp[32] = { 0 };
750 
751         blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
752 
753         if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])
754                 return EINVAL;
755 
756         snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]));
757 
758         cgroups_set("pids.max", tmp);
759 
760         return 0;
761 }
762 
763 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg)
764 {
765         struct blob_attr *cur;
766         int rem;
767 
768         blobmsg_for_each_attr(cur, msg, rem) {
769                 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
770                         return EINVAL;
771 
772                 /* restrict keys */
773                 if (strchr(blobmsg_name(cur), '/') ||
774                     !strcmp(blobmsg_name(cur), "cgroup.subtree_control") ||
775                     !strcmp(blobmsg_name(cur), "cgroup.procs") ||
776                     !strcmp(blobmsg_name(cur), "cgroup.threads") ||
777                     !strcmp(blobmsg_name(cur), "cgroup.freeze"))
778                         return EINVAL;
779 
780                 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur));
781         }
782 
783         return 0;
784 }
785 
786 enum {
787         OCI_LINUX_CGROUPS_BLOCKIO,
788         OCI_LINUX_CGROUPS_CPU,
789         OCI_LINUX_CGROUPS_DEVICES,
790         OCI_LINUX_CGROUPS_HUGEPAGELIMITS,
791         OCI_LINUX_CGROUPS_INTELRDT,
792         OCI_LINUX_CGROUPS_MEMORY,
793         OCI_LINUX_CGROUPS_NETWORK,
794         OCI_LINUX_CGROUPS_PIDS,
795         OCI_LINUX_CGROUPS_RDMA,
796         OCI_LINUX_CGROUPS_UNIFIED,
797         __OCI_LINUX_CGROUPS_MAX,
798 };
799 
800 static const struct blobmsg_policy oci_linux_cgroups_policy[] = {
801         [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE },
802         [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE },
803         [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
804         [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY },
805         [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE },
806         [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE },
807         [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE },
808         [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE },
809         [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE },
810         [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE },
811 };
812 
813 int parseOCIlinuxcgroups(struct blob_attr *msg)
814 {
815         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX];
816         int ret;
817 
818         blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
819 
820         if (tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] ||
821             tb[OCI_LINUX_CGROUPS_INTELRDT] ||
822             tb[OCI_LINUX_CGROUPS_NETWORK] ||
823             tb[OCI_LINUX_CGROUPS_RDMA])
824                 return ENOTSUP;
825 
826         if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) {
827                 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]);
828                 if (ret)
829                         return ret;
830         }
831 
832         if (tb[OCI_LINUX_CGROUPS_CPU]) {
833                 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]);
834                 if (ret)
835                         return ret;
836         }
837 
838         if (tb[OCI_LINUX_CGROUPS_DEVICES]) {
839                 ret = parseOCIlinuxcgroups_devices(tb[OCI_LINUX_CGROUPS_DEVICES]);
840                 if (ret)
841                         return ret;
842         }
843 
844         if (tb[OCI_LINUX_CGROUPS_MEMORY]) {
845                 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]);
846                 if (ret)
847                         return ret;
848         }
849 
850         if (tb[OCI_LINUX_CGROUPS_PIDS]) {
851                 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]);
852                 if (ret)
853                         return ret;
854         }
855 
856         if (tb[OCI_LINUX_CGROUPS_UNIFIED]) {
857                 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]);
858                 if (ret)
859                         return ret;
860         }
861 
862         return 0;
863 }
864 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt