1 /* 2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License version 2.1 6 * as published by the Free Software Foundation 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * reads unified cgroup config as proposed in 14 * https://github.com/opencontainers/runtime-spec/pull/1040 15 * attempt conversion from cgroup1 -> cgroup2 16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2 17 * 18 * ToDo: 19 * - convert cgroup1 net_prio and net_cls to eBPF program 20 * - rdma (anyone?) intelrdt (anyone?) 21 */ 22 23 #define _GNU_SOURCE 24 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <stdlib.h> 28 #include <stdio.h> 29 #include <string.h> 30 #include <sys/stat.h> 31 #include <sys/mman.h> 32 #include <unistd.h> 33 #include <libgen.h> 34 #include <inttypes.h> 35 36 #include <libubox/avl.h> 37 #include <libubox/avl-cmp.h> 38 #include <libubox/blobmsg.h> 39 #include <libubox/list.h> 40 #include <libubox/utils.h> 41 42 #include "log.h" 43 #include "cgroups.h" 44 #include "cgroups-bpf.h" 45 46 #define CGROUP_ROOT "/sys/fs/cgroup/" 47 #define CGROUP_IO_WEIGHT_MAX 10000 48 49 struct cgval { 50 struct avl_node avl; 51 char *val; 52 }; 53 54 struct avl_tree cgvals; 55 static char *cgroup_path; 56 static bool initialized; 57 58 void cgroups_prepare(void) { 59 initialized = false; 60 } 61 62 void cgroups_init(const char *p) { 63 avl_init(&cgvals, avl_strcmp, false, NULL); 64 cgroup_path = strdup(p); 65 initialized = true; 66 } 67 68 static void cgroups_set(const char *key, const char *val) 69 { 70 struct cgval *valp; 71 72 valp = avl_find_element(&cgvals, key, valp, avl); 73 if (!valp) { 74 valp = malloc(sizeof(struct cgval)); 75 if (!valp) 76 exit(ENOMEM); 77 78 valp->avl.key = strdup(key); 79 avl_insert(&cgvals, &valp->avl); 80 } else { 81 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val); 82 free(valp->val); 83 } 84 85 valp->val = strdup(val); 86 } 87 88 void cgroups_free(void) 89 { 90 struct cgval *valp, *tmp; 91 92 if (initialized) { 93 avl_remove_all_elements(&cgvals, valp, avl, tmp) { 94 free((void *)(valp->avl.key)); 95 free(valp->val); 96 free(valp); 97 } 98 free(cgroup_path); 99 } 100 } 101 102 void cgroups_apply(pid_t pid) 103 { 104 struct cgval *valp; 105 char *cdir, *ent; 106 int fd; 107 size_t maxlen = strlen("cgroup.subtree_control"); 108 109 bool cpuset = false, 110 cpu = false, 111 hugetlb = false, 112 io = false, 113 memory = false, 114 pids = false, 115 rdma = false; 116 117 char subtree_control[64] = { 0 }; 118 119 DEBUG("using cgroup path %s\n", cgroup_path); 120 mkdir_p(cgroup_path, 0700); 121 122 /* find which controllers need to be enabled */ 123 avl_for_each_element(&cgvals, valp, avl) { 124 ent = (char *)valp->avl.key; 125 if (strlen(ent) > maxlen) 126 maxlen = strlen(ent); 127 128 if (!strncmp("cpuset.", ent, 7)) 129 cpuset = true; 130 else if (!strncmp("cpu.", ent, 4)) 131 cpu = true; 132 else if (!strncmp("hugetlb.", ent, 8)) 133 hugetlb = true; 134 else if (!strncmp("io.", ent, 3)) 135 io = true; 136 else if (!strncmp("memory.", ent, 7)) 137 memory = true; 138 else if (!strncmp("pids.", ent, 5)) 139 pids = true; 140 else if (!strncmp("rdma.", ent, 5)) 141 rdma = true; 142 } 143 144 maxlen += strlen(cgroup_path) + 2; 145 146 if (cpuset) 147 strcat(subtree_control, "+cpuset "); 148 149 if (cpu) 150 strcat(subtree_control, "+cpu "); 151 152 if (hugetlb) 153 strcat(subtree_control, "+hugetlb "); 154 155 if (io) 156 strcat(subtree_control, "+io "); 157 158 if (memory) 159 strcat(subtree_control, "+memory "); 160 161 if (pids) 162 strcat(subtree_control, "+pids "); 163 164 if (rdma) 165 strcat(subtree_control, "+rdma "); 166 167 /* remove trailing space (length is > 0) */ 168 ent = strchr(subtree_control, '\0'); 169 if (ent > subtree_control) { 170 ent -= 1; 171 *ent = '\0'; 172 } 173 174 ent = malloc(maxlen); 175 if (!ent) 176 exit(ENOMEM); 177 178 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control); 179 cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2]; 180 while ((cdir = strchr(cdir + 1, '/'))) { 181 *cdir = '\0'; 182 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path); 183 DEBUG(" * %s\n", ent); 184 if ((fd = open(ent, O_WRONLY)) < 0) { 185 ERROR("can't open %s: %m\n", ent); 186 continue; 187 } 188 189 if (write(fd, subtree_control, strlen(subtree_control)) == -1) { 190 ERROR("can't write to %s: %m\n", ent); 191 close(fd); 192 continue; 193 } 194 195 close(fd); 196 *cdir = '/'; 197 } 198 199 avl_for_each_element(&cgvals, valp, avl) { 200 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val); 201 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key); 202 fd = open(ent, O_WRONLY); 203 if (fd < 0) { 204 ERROR("can't open %s: %m\n", ent); 205 continue; 206 } 207 if (dprintf(fd, "%s", valp->val) < 0) { 208 ERROR("can't write to %s: %m\n", ent); 209 }; 210 close(fd); 211 } 212 213 int dirfd = open(cgroup_path, O_DIRECTORY); 214 if (dirfd < 0) { 215 ERROR("can't open %s: %m\n", cgroup_path); 216 } else { 217 attach_cgroups_ebpf(dirfd); 218 close(dirfd); 219 } 220 221 snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs"); 222 fd = open(ent, O_WRONLY); 223 if (fd < 0) { 224 ERROR("can't open %s: %m\n", cgroup_path); 225 } else { 226 dprintf(fd, "%d", pid); 227 close(fd); 228 } 229 230 free(ent); 231 } 232 233 enum { 234 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR, 235 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR, 236 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT, 237 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT, 238 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, 239 }; 240 241 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = { 242 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 }, 243 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 }, 244 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 }, 245 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 }, 246 }; 247 248 enum { 249 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR, 250 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR, 251 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE, 252 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, 253 }; 254 255 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = { 256 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 }, 257 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 }, 258 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 }, 259 }; 260 261 enum { 262 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT, 263 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT, 264 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE, 265 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE, 266 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE, 267 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE, 268 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE, 269 __OCI_LINUX_CGROUPS_BLOCKIO_MAX, 270 }; 271 272 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = { 273 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 }, 274 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 }, 275 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY }, 276 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY }, 277 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY }, 278 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY }, 279 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY }, 280 }; 281 282 struct posix_dev { 283 uint64_t major; 284 uint64_t minor; 285 }; 286 287 struct iomax_line { 288 struct avl_node avl; 289 struct posix_dev dev; 290 uint64_t rbps; 291 uint64_t wbps; 292 uint64_t riops; 293 uint64_t wiops; 294 }; 295 296 static int avl_devcmp(const void *k1, const void *k2, void *ptr) 297 { 298 struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2; 299 300 if (d1->major < d2->major) 301 return -1; 302 303 if (d1->major > d2->major) 304 return 1; 305 306 if (d1->minor < d2->minor) 307 return -1; 308 309 if (d1->minor > d2->minor) 310 return 1; 311 312 return 0; 313 } 314 315 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor) 316 { 317 struct iomax_line *l; 318 struct posix_dev d; 319 d.major = major; 320 d.minor = minor; 321 l = avl_find_element(iomax, &d, l, avl); 322 if (!l) { 323 l = malloc(sizeof(struct iomax_line)); 324 if (!l) 325 exit(ENOMEM); 326 327 l->dev.major = d.major; 328 l->dev.minor = d.minor; 329 l->avl.key = &l->dev; 330 l->rbps = -1; 331 l->wbps = -1; 332 l->riops = -1; 333 l->wiops = -1; 334 avl_insert(iomax, &l->avl); 335 } 336 337 return l; 338 } 339 340 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg) 341 { 342 struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX], 343 *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX], 344 *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX], 345 *cur; 346 int rem; 347 int weight = -1, leafweight = -1; 348 size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1; 349 char **weightstrs = NULL, **iomaxstrs = NULL, **curstr; 350 char *weightstr, *iomaxstr; 351 struct avl_tree iomax; 352 struct iomax_line *curiomax, *tmp; 353 354 blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 355 356 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) { 357 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]); 358 ++numweightstrs; 359 } 360 361 if (weight > CGROUP_IO_WEIGHT_MAX) 362 return ERANGE; 363 364 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT]) 365 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]); 366 367 if (leafweight > CGROUP_IO_WEIGHT_MAX) 368 return ERANGE; 369 370 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) 371 ++numweightstrs; 372 373 weightstrs = calloc(numweightstrs + 1, sizeof(char *)); 374 if (!weightstrs) 375 exit(ENOMEM); 376 377 numweightstrs = 0; 378 379 if (weight > -1) 380 if (asprintf(&weightstrs[numweightstrs++], "default %d", weight) < 0) 381 return ENOMEM; 382 383 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) { 384 uint64_t major, minor; 385 int devweight = weight, devleafweight = leafweight; 386 387 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur)); 388 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] || 389 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]) 390 return ENODATA; 391 392 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] && 393 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]) 394 return ENODATA; 395 396 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]) 397 devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]); 398 399 if (devweight > CGROUP_IO_WEIGHT_MAX) 400 return ERANGE; 401 402 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]) 403 devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]); 404 405 if (devleafweight > CGROUP_IO_WEIGHT_MAX) 406 return ERANGE; 407 408 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]) 409 return ENOTSUP; 410 411 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]); 412 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]); 413 414 if (asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight) < 0) 415 return ENOMEM; 416 } 417 418 if (numweightstrs) { 419 curstr = weightstrs; 420 while (*curstr) 421 strtotlen += strlen(*(curstr++)) + 1; 422 423 weightstr = calloc(strtotlen, sizeof(char)); 424 if (!weightstr) 425 exit(ENOMEM); 426 427 curstr = weightstrs; 428 while (*curstr) { 429 strcat(weightstr, *curstr); 430 strcat(weightstr, "\n"); 431 free(*(curstr++)); 432 } 433 434 cgroups_set("io.bfq.weight", weightstr); 435 free(weightstr); 436 }; 437 438 free(weightstrs); 439 440 avl_init(&iomax, avl_devcmp, false, NULL); 441 442 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) { 443 struct iomax_line *l; 444 445 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 446 447 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 448 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 449 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 450 return ENODATA; 451 452 l = get_iomax_line(&iomax, 453 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 454 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 455 456 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 457 } 458 459 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) { 460 struct iomax_line *l; 461 462 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 463 464 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 465 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 466 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 467 return ENODATA; 468 469 l = get_iomax_line(&iomax, 470 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 471 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 472 473 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 474 } 475 476 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) { 477 struct iomax_line *l; 478 479 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 480 481 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 482 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 483 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 484 return ENODATA; 485 486 l = get_iomax_line(&iomax, 487 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 488 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 489 490 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 491 } 492 493 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) { 494 struct iomax_line *l; 495 496 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 497 498 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 499 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 500 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 501 return ENODATA; 502 503 l = get_iomax_line(&iomax, 504 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 505 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 506 507 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 508 } 509 510 avl_for_each_element(&iomax, curiomax, avl) 511 ++numiomaxstrs; 512 513 if (!numiomaxstrs) 514 return 0; 515 516 iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *)); 517 if (!iomaxstrs) 518 exit(ENOMEM); 519 520 numiomaxstrs = 0; 521 522 avl_for_each_element(&iomax, curiomax, avl) { 523 char iomaxlstr[160]; 524 char lstr[32]; 525 526 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor); 527 528 if (curiomax->rbps != -1) { 529 sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps); 530 strcat(iomaxlstr, lstr); 531 } 532 if (curiomax->wbps != -1) { 533 sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps); 534 strcat(iomaxlstr, lstr); 535 } 536 if (curiomax->riops != -1) { 537 sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops); 538 strcat(iomaxlstr, lstr); 539 } 540 if (curiomax->wiops != -1) { 541 sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops); 542 strcat(iomaxlstr, lstr); 543 } 544 545 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr); 546 } 547 548 avl_for_each_element_safe(&iomax, curiomax, avl, tmp) { 549 avl_delete(&iomax, &curiomax->avl); 550 free(curiomax); 551 } 552 553 strtotlen = 1; /* 1 accounts for \0 at end of string */ 554 if (numiomaxstrs) { 555 curstr = iomaxstrs; 556 while (*curstr) 557 strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */ 558 559 iomaxstr = calloc(strtotlen, sizeof(char)); 560 if (!iomaxstr) 561 exit(ENOMEM); 562 563 curstr = iomaxstrs; 564 565 while (*curstr) { 566 strcat(iomaxstr, *curstr); 567 strcat(iomaxstr, "\n"); 568 free(*(curstr++)); 569 } 570 571 cgroups_set("io.max", iomaxstr); 572 free(iomaxstr); 573 }; 574 575 free(iomaxstrs); 576 577 return 0; 578 } 579 580 581 enum { 582 OCI_LINUX_CGROUPS_CPU_SHARES, 583 OCI_LINUX_CGROUPS_CPU_PERIOD, 584 OCI_LINUX_CGROUPS_CPU_QUOTA, 585 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME, 586 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD, 587 OCI_LINUX_CGROUPS_CPU_CPUS, 588 OCI_LINUX_CGROUPS_CPU_MEMS, 589 __OCI_LINUX_CGROUPS_CPU_MAX, 590 }; 591 592 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = { 593 [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 }, 594 [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 }, 595 [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */ 596 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 }, 597 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 }, 598 [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING }, 599 [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING }, 600 }; 601 602 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg) 603 { 604 struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX]; 605 uint64_t shares, period = 0; 606 int64_t quota = -2; /* unset */ 607 char tmp[32] = { 0 }; 608 609 blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 610 611 if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] || 612 tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME]) 613 return ENOTSUP; /* no equivalent in cgroup2 */ 614 615 if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) { 616 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]); 617 if ((shares < 2) || (shares > 262144)) 618 return ERANGE; 619 620 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142)); 621 cgroups_set("cpu.weight", tmp); 622 tmp[0] = '\0'; 623 } 624 625 if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA]) 626 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]); 627 628 if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD]) 629 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]); 630 631 if (period) { 632 if (quota >= 0) 633 snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period); 634 else 635 snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */ 636 } else if (quota >= 0) { 637 snprintf(tmp, sizeof(tmp), "%" PRId64, quota); 638 } else if (quota == -1) { 639 strcpy(tmp, "max"); 640 } 641 642 if (tmp[0]) 643 cgroups_set("cpu.max", tmp); 644 645 if (tb[OCI_LINUX_CGROUPS_CPU_CPUS]) 646 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS])); 647 648 if (tb[OCI_LINUX_CGROUPS_CPU_MEMS]) 649 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS])); 650 651 return 0; 652 } 653 654 655 enum { 656 OCI_LINUX_CGROUPS_MEMORY_LIMIT, 657 OCI_LINUX_CGROUPS_MEMORY_RESERVATION, 658 OCI_LINUX_CGROUPS_MEMORY_SWAP, 659 OCI_LINUX_CGROUPS_MEMORY_KERNEL, 660 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP, 661 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS, 662 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER, 663 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY, 664 __OCI_LINUX_CGROUPS_MEMORY_MAX, 665 }; 666 667 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = { 668 [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */ 669 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */ 670 [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */ 671 [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */ 672 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */ 673 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 }, 674 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL }, 675 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] = { "useHierarchy", BLOBMSG_TYPE_BOOL }, 676 }; 677 678 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg) 679 { 680 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX]; 681 char tmp[32] = { 0 }; 682 int64_t limit = -1, swap, reservation; 683 684 blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 685 686 /* 687 * not all properties of the OCI memory section can be mapped to cgroup2 688 * kernel memory accounting is always enabled and included in the set 689 * memory limit, hence these options can be ignored 690 * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF 691 * preventing self-upgrade (but allow downgrade) 692 * 693 * see also https://github.com/opencontainers/runtime-spec/issues/1005 694 */ 695 if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] || 696 tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] || 697 tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY]) 698 return ENOTSUP; 699 700 701 if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) { 702 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]); 703 if (limit == -1) 704 strcpy(tmp, "max"); 705 else 706 snprintf(tmp, sizeof(tmp), "%" PRId64, limit); 707 708 cgroups_set("memory.max", tmp); 709 } 710 711 if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) { 712 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]); 713 714 if (reservation == -1) 715 strcpy(tmp, "max"); 716 else 717 snprintf(tmp, sizeof(tmp), "%" PRId64, reservation); 718 719 cgroups_set("memory.low", tmp); 720 } 721 722 /* OCI 'swap' acounts for memory+swap */ 723 if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) { 724 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]); 725 726 if (swap == -1) 727 strcpy(tmp, "max"); 728 else if (limit == -1 || (limit < swap)) 729 snprintf(tmp, sizeof(tmp), "%" PRId64, swap); 730 else 731 snprintf(tmp, sizeof(tmp), "%" PRId64, limit - swap); 732 733 cgroups_set("memory.swap_max", tmp); 734 } 735 736 return 0; 737 } 738 739 740 enum { 741 OCI_LINUX_CGROUPS_PIDS_LIMIT, 742 __OCI_LINUX_CGROUPS_PIDS_MAX, 743 }; 744 745 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = { 746 [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, 747 }; 748 749 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg) 750 { 751 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX]; 752 char tmp[32] = { 0 }; 753 754 blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 755 756 if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]) 757 return EINVAL; 758 759 snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])); 760 761 cgroups_set("pids.max", tmp); 762 763 return 0; 764 } 765 766 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg) 767 { 768 struct blob_attr *cur; 769 int rem; 770 771 blobmsg_for_each_attr(cur, msg, rem) { 772 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING) 773 return EINVAL; 774 775 /* restrict keys */ 776 if (strchr(blobmsg_name(cur), '/') || 777 !strcmp(blobmsg_name(cur), "cgroup.subtree_control") || 778 !strcmp(blobmsg_name(cur), "cgroup.procs") || 779 !strcmp(blobmsg_name(cur), "cgroup.threads") || 780 !strcmp(blobmsg_name(cur), "cgroup.freeze")) 781 return EINVAL; 782 783 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur)); 784 } 785 786 return 0; 787 } 788 789 enum { 790 OCI_LINUX_CGROUPS_BLOCKIO, 791 OCI_LINUX_CGROUPS_CPU, 792 OCI_LINUX_CGROUPS_DEVICES, 793 OCI_LINUX_CGROUPS_HUGEPAGELIMITS, 794 OCI_LINUX_CGROUPS_INTELRDT, 795 OCI_LINUX_CGROUPS_MEMORY, 796 OCI_LINUX_CGROUPS_NETWORK, 797 OCI_LINUX_CGROUPS_PIDS, 798 OCI_LINUX_CGROUPS_RDMA, 799 OCI_LINUX_CGROUPS_UNIFIED, 800 __OCI_LINUX_CGROUPS_MAX, 801 }; 802 803 static const struct blobmsg_policy oci_linux_cgroups_policy[] = { 804 [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE }, 805 [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE }, 806 [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY }, 807 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY }, 808 [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE }, 809 [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE }, 810 [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE }, 811 [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE }, 812 [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE }, 813 [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE }, 814 }; 815 816 int parseOCIlinuxcgroups(struct blob_attr *msg) 817 { 818 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX]; 819 int ret; 820 821 blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 822 823 if (tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] || 824 tb[OCI_LINUX_CGROUPS_INTELRDT] || 825 tb[OCI_LINUX_CGROUPS_NETWORK] || 826 tb[OCI_LINUX_CGROUPS_RDMA]) 827 return ENOTSUP; 828 829 if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) { 830 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]); 831 if (ret) 832 return ret; 833 } 834 835 if (tb[OCI_LINUX_CGROUPS_CPU]) { 836 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]); 837 if (ret) 838 return ret; 839 } 840 841 if (tb[OCI_LINUX_CGROUPS_DEVICES]) { 842 ret = parseOCIlinuxcgroups_devices(tb[OCI_LINUX_CGROUPS_DEVICES]); 843 if (ret) 844 return ret; 845 } 846 847 if (tb[OCI_LINUX_CGROUPS_MEMORY]) { 848 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]); 849 if (ret) 850 return ret; 851 } 852 853 if (tb[OCI_LINUX_CGROUPS_PIDS]) { 854 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]); 855 if (ret) 856 return ret; 857 } 858 859 if (tb[OCI_LINUX_CGROUPS_UNIFIED]) { 860 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]); 861 if (ret) 862 return ret; 863 } 864 865 return 0; 866 } 867
This page was automatically generated by LXR 0.3.1. • OpenWrt