• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/procd/jail/cgroups.c

  1 /*
  2  * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
  3  *
  4  * This program is free software; you can redistribute it and/or modify
  5  * it under the terms of the GNU Lesser General Public License version 2.1
  6  * as published by the Free Software Foundation
  7  *
  8  * This program is distributed in the hope that it will be useful,
  9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11  * GNU General Public License for more details.
 12  *
 13  * reads unified cgroup config as proposed in
 14  * https://github.com/opencontainers/runtime-spec/pull/1040
 15  * attempt conversion from cgroup1 -> cgroup2
 16  * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2
 17  *
 18  * ToDo:
 19  *  - convert cgroup1 net_prio and net_cls to eBPF program
 20  *  - rdma (anyone?) intelrdt (anyone?)
 21  */
 22 
 23 #define _GNU_SOURCE
 24 
 25 #include <errno.h>
 26 #include <fcntl.h>
 27 #include <stdlib.h>
 28 #include <stdio.h>
 29 #include <string.h>
 30 #include <sys/stat.h>
 31 #include <sys/mman.h>
 32 #include <unistd.h>
 33 #include <libgen.h>
 34 #include <inttypes.h>
 35 
 36 #include <libubox/avl.h>
 37 #include <libubox/avl-cmp.h>
 38 #include <libubox/blobmsg.h>
 39 #include <libubox/list.h>
 40 #include <libubox/utils.h>
 41 
 42 #include "log.h"
 43 #include "cgroups.h"
 44 #include "cgroups-bpf.h"
 45 
 46 #define CGROUP_ROOT "/sys/fs/cgroup/"
 47 #define CGROUP_IO_WEIGHT_MAX 10000
 48 
 49 struct cgval {
 50         struct avl_node avl;
 51         char *val;
 52 };
 53 
 54 struct avl_tree cgvals;
 55 static char *cgroup_path;
 56 static bool initialized;
 57 
 58 void cgroups_prepare(void) {
 59         initialized = false;
 60 }
 61 
 62 void cgroups_init(const char *p) {
 63         avl_init(&cgvals, avl_strcmp, false, NULL);
 64         cgroup_path = strdup(p);
 65         initialized = true;
 66 }
 67 
 68 static void cgroups_set(const char *key, const char *val)
 69 {
 70         struct cgval *valp;
 71 
 72         valp = avl_find_element(&cgvals, key, valp, avl);
 73         if (!valp) {
 74                 valp = malloc(sizeof(struct cgval));
 75                 if (!valp)
 76                         exit(ENOMEM);
 77 
 78                 valp->avl.key = strdup(key);
 79                 avl_insert(&cgvals, &valp->avl);
 80         } else {
 81                 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val);
 82                 free(valp->val);
 83         }
 84 
 85         valp->val = strdup(val);
 86 }
 87 
 88 void cgroups_free(void)
 89 {
 90         struct cgval *valp, *tmp;
 91 
 92         if (initialized) {
 93                 avl_remove_all_elements(&cgvals, valp, avl, tmp) {
 94                         free((void *)(valp->avl.key));
 95                         free(valp->val);
 96                         free(valp);
 97                 }
 98                 free(cgroup_path);
 99         }
100 }
101 
102 void cgroups_apply(pid_t pid)
103 {
104         struct cgval *valp;
105         char *cdir, *ent;
106         int fd;
107         size_t maxlen = strlen("cgroup.subtree_control");
108 
109         bool cpuset = false,
110              cpu = false,
111              hugetlb = false,
112              io = false,
113              memory = false,
114              pids = false,
115              rdma = false;
116 
117         char subtree_control[64] = { 0 };
118 
119         DEBUG("using cgroup path %s\n", cgroup_path);
120         mkdir_p(cgroup_path, 0700);
121 
122         /* find which controllers need to be enabled */
123         avl_for_each_element(&cgvals, valp, avl) {
124                 ent = (char *)valp->avl.key;
125                 if (strlen(ent) > maxlen)
126                         maxlen = strlen(ent);
127 
128                 if (!strncmp("cpuset.", ent, 7))
129                         cpuset = true;
130                 else if (!strncmp("cpu.", ent, 4))
131                         cpu = true;
132                 else if (!strncmp("hugetlb.", ent, 8))
133                         hugetlb = true;
134                 else if (!strncmp("io.", ent, 3))
135                         io = true;
136                 else if (!strncmp("memory.", ent, 7))
137                         memory = true;
138                 else if (!strncmp("pids.", ent, 5))
139                         pids = true;
140                 else if (!strncmp("rdma.", ent, 5))
141                         rdma = true;
142         }
143 
144         maxlen += strlen(cgroup_path) + 2;
145 
146         if (cpuset)
147                 strcat(subtree_control, "+cpuset ");
148 
149         if (cpu)
150                 strcat(subtree_control, "+cpu ");
151 
152         if (hugetlb)
153                 strcat(subtree_control, "+hugetlb ");
154 
155         if (io)
156                 strcat(subtree_control, "+io ");
157 
158         if (memory)
159                 strcat(subtree_control, "+memory ");
160 
161         if (pids)
162                 strcat(subtree_control, "+pids ");
163 
164         if (rdma)
165                 strcat(subtree_control, "+rdma ");
166 
167         /* remove trailing space (length is > 0) */
168         ent = strchr(subtree_control, '\0');
169         if (ent > subtree_control) {
170                 ent -= 1;
171                 *ent = '\0';
172         }
173 
174         ent = malloc(maxlen);
175         if (!ent)
176                 exit(ENOMEM);
177 
178         DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control);
179         cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2];
180         while ((cdir = strchr(cdir + 1, '/'))) {
181                 *cdir = '\0';
182                 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path);
183                 DEBUG(" * %s\n", ent);
184                 if ((fd = open(ent, O_WRONLY)) < 0) {
185                         ERROR("can't open %s: %m\n", ent);
186                         continue;
187                 }
188 
189                 if (write(fd, subtree_control, strlen(subtree_control)) == -1) {
190                         ERROR("can't write to %s: %m\n", ent);
191                         close(fd);
192                         continue;
193                 }
194 
195                 close(fd);
196                 *cdir = '/';
197         }
198 
199         avl_for_each_element(&cgvals, valp, avl) {
200                 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val);
201                 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key);
202                 fd = open(ent, O_WRONLY);
203                 if (fd < 0) {
204                         ERROR("can't open %s: %m\n", ent);
205                         continue;
206                 }
207                 if (dprintf(fd, "%s", valp->val) < 0) {
208                         ERROR("can't write to %s: %m\n", ent);
209                 };
210                 close(fd);
211         }
212 
213         int dirfd = open(cgroup_path, O_DIRECTORY);
214         if (dirfd < 0) {
215                 ERROR("can't open %s: %m\n", cgroup_path);
216         } else {
217                 attach_cgroups_ebpf(dirfd);
218                 close(dirfd);
219         }
220 
221         snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs");
222         fd = open(ent, O_WRONLY);
223         if (fd < 0) {
224                 ERROR("can't open %s: %m\n", cgroup_path);
225         } else {
226                 dprintf(fd, "%d", pid);
227                 close(fd);
228         }
229 
230         free(ent);
231 }
232 
233 enum {
234         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR,
235         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR,
236         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT,
237         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT,
238         __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX,
239 };
240 
241 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = {
242         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
243         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
244         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
245         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
246 };
247 
248 enum {
249         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR,
250         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR,
251         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE,
252         __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX,
253 };
254 
255 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = {
256         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 },
257         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 },
258         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 },
259 };
260 
261 enum {
262         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT,
263         OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT,
264         OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE,
265         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE,
266         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE,
267         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE,
268         OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE,
269         __OCI_LINUX_CGROUPS_BLOCKIO_MAX,
270 };
271 
272 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = {
273         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 },
274         [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 },
275         [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY },
276         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY },
277         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY },
278         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY },
279         [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY },
280 };
281 
282 struct posix_dev {
283         uint64_t major;
284         uint64_t minor;
285 };
286 
287 struct iomax_line {
288         struct avl_node avl;
289         struct posix_dev dev;
290         uint64_t rbps;
291         uint64_t wbps;
292         uint64_t riops;
293         uint64_t wiops;
294 };
295 
296 static int avl_devcmp(const void *k1, const void *k2, void *ptr)
297 {
298         struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2;
299 
300         if (d1->major < d2->major)
301                 return -1;
302 
303         if (d1->major > d2->major)
304                 return 1;
305 
306         if (d1->minor < d2->minor)
307                 return -1;
308 
309         if (d1->minor > d2->minor)
310                 return 1;
311 
312         return 0;
313 }
314 
315 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor)
316 {
317         struct iomax_line *l;
318         struct posix_dev d;
319         d.major = major;
320         d.minor = minor;
321         l = avl_find_element(iomax, &d, l, avl);
322         if (!l) {
323                 l = malloc(sizeof(struct iomax_line));
324                 if (!l)
325                         exit(ENOMEM);
326 
327                 l->dev.major = d.major;
328                 l->dev.minor = d.minor;
329                 l->avl.key = &l->dev;
330                 l->rbps = -1;
331                 l->wbps = -1;
332                 l->riops = -1;
333                 l->wiops = -1;
334                 avl_insert(iomax, &l->avl);
335         }
336 
337         return l;
338 }
339 
340 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg)
341 {
342         struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX],
343                          *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX],
344                          *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX],
345                          *cur;
346         int rem;
347         int weight = -1, leafweight = -1;
348         size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1;
349         char **weightstrs = NULL, **iomaxstrs = NULL, **curstr;
350         char *weightstr, *iomaxstr;
351         struct avl_tree iomax;
352         struct iomax_line *curiomax, *tmp;
353 
354         blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
355 
356         if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) {
357                 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
358                 ++numweightstrs;
359         }
360 
361         if (weight > CGROUP_IO_WEIGHT_MAX)
362                 return ERANGE;
363 
364         if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT])
365                 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]);
366 
367         if (leafweight > CGROUP_IO_WEIGHT_MAX)
368                 return ERANGE;
369 
370         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem)
371                 ++numweightstrs;
372 
373         weightstrs = calloc(numweightstrs + 1, sizeof(char *));
374         if (!weightstrs)
375                 exit(ENOMEM);
376 
377         numweightstrs = 0;
378 
379         if (weight > -1)
380                 if (asprintf(&weightstrs[numweightstrs++], "default %d", weight) < 0)
381                         return ENOMEM;
382 
383         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) {
384                 uint64_t major, minor;
385                 int devweight = weight, devleafweight = leafweight;
386 
387                 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur));
388                 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] ||
389                     !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR])
390                         return ENODATA;
391 
392                 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] &&
393                     !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
394                         return ENODATA;
395 
396                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT])
397                         devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]);
398 
399                 if (devweight > CGROUP_IO_WEIGHT_MAX)
400                         return ERANGE;
401 
402                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
403                         devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]);
404 
405                 if (devleafweight > CGROUP_IO_WEIGHT_MAX)
406                         return ERANGE;
407 
408                 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT])
409                         return ENOTSUP;
410 
411                 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]);
412                 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]);
413 
414                 if (asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight) < 0)
415                         return ENOMEM;
416         }
417 
418         if (numweightstrs) {
419                 curstr = weightstrs;
420                 while (*curstr)
421                         strtotlen += strlen(*(curstr++)) + 1;
422 
423                 weightstr = calloc(strtotlen, sizeof(char));
424                 if (!weightstr)
425                         exit(ENOMEM);
426 
427                 curstr = weightstrs;
428                 while (*curstr) {
429                         strcat(weightstr, *curstr);
430                         strcat(weightstr, "\n");
431                         free(*(curstr++));
432                 }
433 
434                 cgroups_set("io.bfq.weight", weightstr);
435                 free(weightstr);
436         };
437 
438         free(weightstrs);
439 
440         avl_init(&iomax, avl_devcmp, false, NULL);
441 
442         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) {
443                 struct iomax_line *l;
444 
445                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
446 
447                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
448                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
449                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
450                         return ENODATA;
451 
452                 l = get_iomax_line(&iomax,
453                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
454                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
455 
456                 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
457         }
458 
459         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) {
460                 struct iomax_line *l;
461 
462                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
463 
464                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
465                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
466                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
467                         return ENODATA;
468 
469                 l = get_iomax_line(&iomax,
470                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
471                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
472 
473                 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
474         }
475 
476         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) {
477                 struct iomax_line *l;
478 
479                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
480 
481                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
482                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
483                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
484                         return ENODATA;
485 
486                 l = get_iomax_line(&iomax,
487                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
488                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
489 
490                 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
491         }
492 
493         blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) {
494                 struct iomax_line *l;
495 
496                 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur));
497 
498                 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] ||
499                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] ||
500                     !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE])
501                         return ENODATA;
502 
503                 l = get_iomax_line(&iomax,
504                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]),
505                                    blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR]));
506 
507                 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]);
508         }
509 
510         avl_for_each_element(&iomax, curiomax, avl)
511                 ++numiomaxstrs;
512 
513         if (!numiomaxstrs)
514                 return 0;
515 
516         iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *));
517         if (!iomaxstrs)
518                 exit(ENOMEM);
519 
520         numiomaxstrs = 0;
521 
522         avl_for_each_element(&iomax, curiomax, avl) {
523                 char iomaxlstr[160];
524                 char lstr[32];
525 
526                 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor);
527 
528                 if (curiomax->rbps != -1) {
529                         sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps);
530                         strcat(iomaxlstr, lstr);
531                 }
532                 if (curiomax->wbps != -1) {
533                         sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps);
534                         strcat(iomaxlstr, lstr);
535                 }
536                 if (curiomax->riops != -1) {
537                         sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops);
538                         strcat(iomaxlstr, lstr);
539                 }
540                 if (curiomax->wiops != -1) {
541                         sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops);
542                         strcat(iomaxlstr, lstr);
543                 }
544 
545                 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr);
546         }
547 
548         avl_for_each_element_safe(&iomax, curiomax, avl, tmp) {
549                 avl_delete(&iomax, &curiomax->avl);
550                 free(curiomax);
551         }
552 
553         strtotlen = 1; /* 1 accounts for \0 at end of string */
554         if (numiomaxstrs) {
555                 curstr = iomaxstrs;
556                 while (*curstr)
557                         strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */
558 
559                 iomaxstr = calloc(strtotlen, sizeof(char));
560                 if (!iomaxstr)
561                         exit(ENOMEM);
562 
563                 curstr = iomaxstrs;
564 
565                 while (*curstr) {
566                         strcat(iomaxstr, *curstr);
567                         strcat(iomaxstr, "\n");
568                         free(*(curstr++));
569                 }
570 
571                 cgroups_set("io.max", iomaxstr);
572                 free(iomaxstr);
573         };
574 
575         free(iomaxstrs);
576 
577         return 0;
578 }
579 
580 
581 enum {
582         OCI_LINUX_CGROUPS_CPU_SHARES,
583         OCI_LINUX_CGROUPS_CPU_PERIOD,
584         OCI_LINUX_CGROUPS_CPU_QUOTA,
585         OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME,
586         OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD,
587         OCI_LINUX_CGROUPS_CPU_CPUS,
588         OCI_LINUX_CGROUPS_CPU_MEMS,
589         __OCI_LINUX_CGROUPS_CPU_MAX,
590 };
591 
592 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = {
593         [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 },
594         [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 },
595         [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */
596         [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 },
597         [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 },
598         [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING },
599         [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING },
600 };
601 
602 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg)
603 {
604         struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX];
605         uint64_t shares, period = 0;
606         int64_t quota = -2; /* unset */
607         char tmp[32] = { 0 };
608 
609         blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
610 
611         if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] ||
612             tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME])
613                 return ENOTSUP; /* no equivalent in cgroup2 */
614 
615         if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) {
616                 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]);
617                 if ((shares < 2) || (shares > 262144))
618                         return ERANGE;
619 
620                 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142));
621                 cgroups_set("cpu.weight", tmp);
622                 tmp[0] = '\0';
623         }
624 
625         if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA])
626                 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]);
627 
628         if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD])
629                 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]);
630 
631         if (period) {
632                 if (quota >= 0)
633                         snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period);
634                 else
635                         snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */
636         } else if (quota >= 0) {
637                 snprintf(tmp, sizeof(tmp), "%" PRId64, quota);
638         } else if (quota == -1) {
639                 strcpy(tmp, "max");
640         }
641 
642         if (tmp[0])
643                 cgroups_set("cpu.max", tmp);
644 
645         if (tb[OCI_LINUX_CGROUPS_CPU_CPUS])
646                 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS]));
647 
648         if (tb[OCI_LINUX_CGROUPS_CPU_MEMS])
649                 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS]));
650 
651         return 0;
652 }
653 
654 
655 enum {
656         OCI_LINUX_CGROUPS_MEMORY_LIMIT,
657         OCI_LINUX_CGROUPS_MEMORY_RESERVATION,
658         OCI_LINUX_CGROUPS_MEMORY_SWAP,
659         OCI_LINUX_CGROUPS_MEMORY_KERNEL,
660         OCI_LINUX_CGROUPS_MEMORY_KERNELTCP,
661         OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS,
662         OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER,
663         OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY,
664         __OCI_LINUX_CGROUPS_MEMORY_MAX,
665 };
666 
667 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = {
668         [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */
669         [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */
670         [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */
671         [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
672         [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */
673         [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 },
674         [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL },
675         [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] = { "useHierarchy", BLOBMSG_TYPE_BOOL },
676 };
677 
678 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg)
679 {
680         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
681         char tmp[32] = { 0 };
682         int64_t limit = -1, swap, reservation;
683 
684         blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
685 
686         /*
687          * not all properties of the OCI memory section can be mapped to cgroup2
688          * kernel memory accounting is always enabled and included in the set
689          *   memory limit, hence these options can be ignored
690          * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF
691          *   preventing self-upgrade (but allow downgrade)
692          *
693          * see also https://github.com/opencontainers/runtime-spec/issues/1005
694          */
695         if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] ||
696             tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] ||
697             tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY])
698                 return ENOTSUP;
699 
700 
701         if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) {
702                 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]);
703                 if (limit == -1)
704                         strcpy(tmp, "max");
705                 else
706                         snprintf(tmp, sizeof(tmp), "%" PRId64, limit);
707 
708                 cgroups_set("memory.max", tmp);
709         }
710 
711         if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) {
712                 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]);
713 
714                 if (reservation == -1)
715                         strcpy(tmp, "max");
716                 else
717                         snprintf(tmp, sizeof(tmp), "%" PRId64, reservation);
718 
719                 cgroups_set("memory.low", tmp);
720         }
721 
722         /* OCI 'swap' acounts for memory+swap */
723         if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) {
724                 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]);
725 
726                 if (swap == -1)
727                         strcpy(tmp, "max");
728                 else if (limit == -1 || (limit < swap))
729                         snprintf(tmp, sizeof(tmp), "%" PRId64, swap);
730                 else
731                         snprintf(tmp, sizeof(tmp), "%" PRId64, limit - swap);
732 
733                 cgroups_set("memory.swap_max", tmp);
734         }
735 
736         return 0;
737 }
738 
739 
740 enum {
741         OCI_LINUX_CGROUPS_PIDS_LIMIT,
742         __OCI_LINUX_CGROUPS_PIDS_MAX,
743 };
744 
745 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = {
746         [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 },
747 };
748 
749 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg)
750 {
751         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX];
752         char tmp[32] = { 0 };
753 
754         blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
755 
756         if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])
757                 return EINVAL;
758 
759         snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]));
760 
761         cgroups_set("pids.max", tmp);
762 
763         return 0;
764 }
765 
766 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg)
767 {
768         struct blob_attr *cur;
769         int rem;
770 
771         blobmsg_for_each_attr(cur, msg, rem) {
772                 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING)
773                         return EINVAL;
774 
775                 /* restrict keys */
776                 if (strchr(blobmsg_name(cur), '/') ||
777                     !strcmp(blobmsg_name(cur), "cgroup.subtree_control") ||
778                     !strcmp(blobmsg_name(cur), "cgroup.procs") ||
779                     !strcmp(blobmsg_name(cur), "cgroup.threads") ||
780                     !strcmp(blobmsg_name(cur), "cgroup.freeze"))
781                         return EINVAL;
782 
783                 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur));
784         }
785 
786         return 0;
787 }
788 
789 enum {
790         OCI_LINUX_CGROUPS_BLOCKIO,
791         OCI_LINUX_CGROUPS_CPU,
792         OCI_LINUX_CGROUPS_DEVICES,
793         OCI_LINUX_CGROUPS_HUGEPAGELIMITS,
794         OCI_LINUX_CGROUPS_INTELRDT,
795         OCI_LINUX_CGROUPS_MEMORY,
796         OCI_LINUX_CGROUPS_NETWORK,
797         OCI_LINUX_CGROUPS_PIDS,
798         OCI_LINUX_CGROUPS_RDMA,
799         OCI_LINUX_CGROUPS_UNIFIED,
800         __OCI_LINUX_CGROUPS_MAX,
801 };
802 
803 static const struct blobmsg_policy oci_linux_cgroups_policy[] = {
804         [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE },
805         [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE },
806         [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY },
807         [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY },
808         [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE },
809         [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE },
810         [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE },
811         [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE },
812         [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE },
813         [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE },
814 };
815 
816 int parseOCIlinuxcgroups(struct blob_attr *msg)
817 {
818         struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX];
819         int ret;
820 
821         blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
822 
823         if (tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] ||
824             tb[OCI_LINUX_CGROUPS_INTELRDT] ||
825             tb[OCI_LINUX_CGROUPS_NETWORK] ||
826             tb[OCI_LINUX_CGROUPS_RDMA])
827                 return ENOTSUP;
828 
829         if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) {
830                 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]);
831                 if (ret)
832                         return ret;
833         }
834 
835         if (tb[OCI_LINUX_CGROUPS_CPU]) {
836                 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]);
837                 if (ret)
838                         return ret;
839         }
840 
841         if (tb[OCI_LINUX_CGROUPS_DEVICES]) {
842                 ret = parseOCIlinuxcgroups_devices(tb[OCI_LINUX_CGROUPS_DEVICES]);
843                 if (ret)
844                         return ret;
845         }
846 
847         if (tb[OCI_LINUX_CGROUPS_MEMORY]) {
848                 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]);
849                 if (ret)
850                         return ret;
851         }
852 
853         if (tb[OCI_LINUX_CGROUPS_PIDS]) {
854                 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]);
855                 if (ret)
856                         return ret;
857         }
858 
859         if (tb[OCI_LINUX_CGROUPS_UNIFIED]) {
860                 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]);
861                 if (ret)
862                         return ret;
863         }
864 
865         return 0;
866 }
867 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt