1 /* 2 * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License version 2.1 6 * as published by the Free Software Foundation 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * reads unified cgroup config as proposed in 14 * https://github.com/opencontainers/runtime-spec/pull/1040 15 * attempt conversion from cgroup1 -> cgroup2 16 * https://github.com/containers/crun/blob/0.14.1/crun.1.md#cgroup-v2 17 * 18 * ToDo: 19 * - convert cgroup1 net_prio and net_cls to eBPF program 20 * - rdma (anyone?) intelrdt (anyone?) 21 */ 22 23 #define _GNU_SOURCE 24 25 #include <errno.h> 26 #include <fcntl.h> 27 #include <stdlib.h> 28 #include <stdio.h> 29 #include <string.h> 30 #include <sys/stat.h> 31 #include <sys/mman.h> 32 #include <unistd.h> 33 #include <libgen.h> 34 #include <inttypes.h> 35 36 #include <libubox/avl.h> 37 #include <libubox/avl-cmp.h> 38 #include <libubox/blobmsg.h> 39 #include <libubox/list.h> 40 #include <libubox/utils.h> 41 42 #include "log.h" 43 #include "cgroups.h" 44 #include "cgroups-bpf.h" 45 46 #define CGROUP_ROOT "/sys/fs/cgroup/" 47 #define CGROUP_IO_WEIGHT_MAX 10000 48 49 struct cgval { 50 struct avl_node avl; 51 char *val; 52 }; 53 54 struct avl_tree cgvals; 55 static char *cgroup_path; 56 static bool initialized; 57 58 void cgroups_prepare(void) { 59 initialized = false; 60 } 61 62 void cgroups_init(const char *p) { 63 avl_init(&cgvals, avl_strcmp, false, NULL); 64 cgroup_path = strdup(p); 65 initialized = true; 66 } 67 68 static void cgroups_set(const char *key, const char *val) 69 { 70 struct cgval *valp; 71 72 valp = avl_find_element(&cgvals, key, valp, avl); 73 if (!valp) { 74 valp = malloc(sizeof(struct cgval)); 75 if (!valp) 76 exit(ENOMEM); 77 78 valp->avl.key = strdup(key); 79 avl_insert(&cgvals, &valp->avl); 80 } else { 81 DEBUG("overwriting previous cgroup2 assignment %s=\"%s\"!\n", key, valp->val); 82 free(valp->val); 83 } 84 85 valp->val = strdup(val); 86 } 87 88 void cgroups_free(void) 89 { 90 struct cgval *valp, *tmp; 91 92 if (initialized) { 93 avl_remove_all_elements(&cgvals, valp, avl, tmp) { 94 free((void *)(valp->avl.key)); 95 free(valp->val); 96 free(valp); 97 } 98 free(cgroup_path); 99 } 100 } 101 102 void cgroups_apply(pid_t pid) 103 { 104 struct cgval *valp; 105 char *cdir, *ent; 106 int fd; 107 size_t maxlen = strlen("cgroup.subtree_control"); 108 109 bool cpuset = false, 110 cpu = false, 111 hugetlb = false, 112 io = false, 113 memory = false, 114 pids = false, 115 rdma = false; 116 117 char subtree_control[64] = { 0 }; 118 119 DEBUG("using cgroup path %s\n", cgroup_path); 120 mkdir_p(cgroup_path, 0700); 121 122 /* find which controllers need to be enabled */ 123 avl_for_each_element(&cgvals, valp, avl) { 124 ent = (char *)valp->avl.key; 125 if (strlen(ent) > maxlen) 126 maxlen = strlen(ent); 127 128 if (!strncmp("cpuset.", ent, 7)) 129 cpuset = true; 130 else if (!strncmp("cpu.", ent, 4)) 131 cpu = true; 132 else if (!strncmp("hugetlb.", ent, 8)) 133 hugetlb = true; 134 else if (!strncmp("io.", ent, 3)) 135 io = true; 136 else if (!strncmp("memory.", ent, 7)) 137 memory = true; 138 else if (!strncmp("pids.", ent, 5)) 139 pids = true; 140 else if (!strncmp("rdma.", ent, 5)) 141 rdma = true; 142 } 143 144 maxlen += strlen(cgroup_path) + 2; 145 146 if (cpuset) 147 strcat(subtree_control, "+cpuset "); 148 149 if (cpu) 150 strcat(subtree_control, "+cpu "); 151 152 if (hugetlb) 153 strcat(subtree_control, "+hugetlb "); 154 155 if (io) 156 strcat(subtree_control, "+io "); 157 158 if (memory) 159 strcat(subtree_control, "+memory "); 160 161 if (pids) 162 strcat(subtree_control, "+pids "); 163 164 if (rdma) 165 strcat(subtree_control, "+rdma "); 166 167 /* remove trailing space */ 168 ent = strchr(subtree_control, '\0') - 1; 169 *ent = '\0'; 170 171 ent = malloc(maxlen); 172 if (!ent) 173 exit(ENOMEM); 174 175 DEBUG("recursively applying cgroup.subtree_control = \"%s\"\n", subtree_control); 176 cdir = &cgroup_path[strlen(CGROUP_ROOT) - 2]; 177 while ((cdir = strchr(cdir + 1, '/'))) { 178 *cdir = '\0'; 179 snprintf(ent, maxlen, "%s/cgroup.subtree_control", cgroup_path); 180 DEBUG(" * %s\n", ent); 181 if ((fd = open(ent, O_WRONLY)) < 0) { 182 ERROR("can't open %s: %m\n", ent); 183 continue; 184 } 185 186 if (write(fd, subtree_control, strlen(subtree_control)) == -1) { 187 ERROR("can't write to %s: %m\n", ent); 188 close(fd); 189 continue; 190 } 191 192 close(fd); 193 *cdir = '/'; 194 } 195 196 avl_for_each_element(&cgvals, valp, avl) { 197 DEBUG("applying cgroup2 %s=\"%s\"\n", (char *)valp->avl.key, valp->val); 198 snprintf(ent, maxlen, "%s/%s", cgroup_path, (char *)valp->avl.key); 199 fd = open(ent, O_WRONLY); 200 if (fd < 0) { 201 ERROR("can't open %s: %m\n", ent); 202 continue; 203 } 204 if (dprintf(fd, "%s", valp->val) < 0) { 205 ERROR("can't write to %s: %m\n", ent); 206 }; 207 close(fd); 208 } 209 210 int dirfd = open(cgroup_path, O_DIRECTORY); 211 if (dirfd < 0) { 212 ERROR("can't open %s: %m\n", cgroup_path); 213 } else { 214 attach_cgroups_ebpf(dirfd); 215 close(dirfd); 216 } 217 218 snprintf(ent, maxlen, "%s/%s", cgroup_path, "cgroup.procs"); 219 fd = open(ent, O_WRONLY); 220 if (fd < 0) { 221 ERROR("can't open %s: %m\n", cgroup_path); 222 } else { 223 dprintf(fd, "%d", pid); 224 close(fd); 225 } 226 227 free(ent); 228 } 229 230 enum { 231 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR, 232 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR, 233 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT, 234 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT, 235 __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, 236 }; 237 238 static const struct blobmsg_policy oci_linux_cgroups_blockio_weightdevice_policy[] = { 239 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 }, 240 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 }, 241 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 }, 242 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 }, 243 }; 244 245 enum { 246 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR, 247 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR, 248 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE, 249 __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, 250 }; 251 252 static const struct blobmsg_policy oci_linux_cgroups_blockio_throttledevice_policy[] = { 253 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] = { "major", BLOBMSG_CAST_INT64 }, 254 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] = { "minor", BLOBMSG_CAST_INT64 }, 255 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE] = { "rate", BLOBMSG_CAST_INT64 }, 256 }; 257 258 enum { 259 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT, 260 OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT, 261 OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE, 262 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE, 263 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE, 264 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE, 265 OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE, 266 __OCI_LINUX_CGROUPS_BLOCKIO_MAX, 267 }; 268 269 static const struct blobmsg_policy oci_linux_cgroups_blockio_policy[] = { 270 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT] = { "weight", BLOBMSG_TYPE_INT32 }, 271 [OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT] = { "leafWeight", BLOBMSG_TYPE_INT32 }, 272 [OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE] = { "weightDevice", BLOBMSG_TYPE_ARRAY }, 273 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE] = { "throttleReadBpsDevice", BLOBMSG_TYPE_ARRAY }, 274 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE] = { "throttleWriteBpsDevice", BLOBMSG_TYPE_ARRAY }, 275 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE] = { "throttleReadIOPSDevice", BLOBMSG_TYPE_ARRAY }, 276 [OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE] = { "throttleWriteIOPSDevice", BLOBMSG_TYPE_ARRAY }, 277 }; 278 279 struct posix_dev { 280 uint64_t major; 281 uint64_t minor; 282 }; 283 284 struct iomax_line { 285 struct avl_node avl; 286 struct posix_dev dev; 287 uint64_t rbps; 288 uint64_t wbps; 289 uint64_t riops; 290 uint64_t wiops; 291 }; 292 293 static int avl_devcmp(const void *k1, const void *k2, void *ptr) 294 { 295 struct posix_dev *d1 = (struct posix_dev *)k1, *d2 = (struct posix_dev *)k2; 296 297 if (d1->major < d2->major) 298 return -1; 299 300 if (d1->major > d2->major) 301 return 1; 302 303 if (d1->minor < d2->minor) 304 return -1; 305 306 if (d1->minor > d2->minor) 307 return 1; 308 309 return 0; 310 } 311 312 static struct iomax_line *get_iomax_line(struct avl_tree *iomax, uint64_t major, uint64_t minor) 313 { 314 struct iomax_line *l; 315 struct posix_dev d; 316 d.major = major; 317 d.minor = minor; 318 l = avl_find_element(iomax, &d, l, avl); 319 if (!l) { 320 l = malloc(sizeof(struct iomax_line)); 321 if (!l) 322 exit(ENOMEM); 323 324 l->dev.major = d.major; 325 l->dev.minor = d.minor; 326 l->avl.key = &l->dev; 327 l->rbps = -1; 328 l->wbps = -1; 329 l->riops = -1; 330 l->wiops = -1; 331 avl_insert(iomax, &l->avl); 332 } 333 334 return l; 335 } 336 337 static int parseOCIlinuxcgroups_legacy_blockio(struct blob_attr *msg) 338 { 339 struct blob_attr *tb[__OCI_LINUX_CGROUPS_BLOCKIO_MAX], 340 *tbwd[__OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX], 341 *tbtd[__OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX], 342 *cur; 343 int rem; 344 int weight = -1, leafweight = -1; 345 size_t numweightstrs = 0, numiomaxstrs = 0, strtotlen = 1; 346 char **weightstrs = NULL, **iomaxstrs = NULL, **curstr; 347 char *weightstr, *iomaxstr; 348 struct avl_tree iomax; 349 struct iomax_line *curiomax, *tmp; 350 351 blobmsg_parse(oci_linux_cgroups_blockio_policy, __OCI_LINUX_CGROUPS_BLOCKIO_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 352 353 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]) { 354 weight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]); 355 ++numweightstrs; 356 } 357 358 if (weight > CGROUP_IO_WEIGHT_MAX) 359 return ERANGE; 360 361 if (tb[OCI_LINUX_CGROUPS_BLOCKIO_LEAFWEIGHT]) 362 leafweight = blobmsg_get_u32(tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHT]); 363 364 if (leafweight > CGROUP_IO_WEIGHT_MAX) 365 return ERANGE; 366 367 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) 368 ++numweightstrs; 369 370 weightstrs = calloc(numweightstrs + 1, sizeof(char *)); 371 if (!weightstrs) 372 exit(ENOMEM); 373 374 numweightstrs = 0; 375 376 if (weight > -1) 377 if (asprintf(&weightstrs[numweightstrs++], "default %d", weight) < 0) 378 return ENOMEM; 379 380 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE], rem) { 381 uint64_t major, minor; 382 int devweight = weight, devleafweight = leafweight; 383 384 blobmsg_parse(oci_linux_cgroups_blockio_weightdevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAX, tbwd, blobmsg_data(cur), blobmsg_len(cur)); 385 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR] || 386 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]) 387 return ENODATA; 388 389 if (!tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT] && 390 !tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]) 391 return ENODATA; 392 393 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]) 394 devweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_WEIGHT]); 395 396 if (devweight > CGROUP_IO_WEIGHT_MAX) 397 return ERANGE; 398 399 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]) 400 devleafweight = blobmsg_get_u32(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]); 401 402 if (devleafweight > CGROUP_IO_WEIGHT_MAX) 403 return ERANGE; 404 405 if (tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_LEAFWEIGHT]) 406 return ENOTSUP; 407 408 major = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MAJOR]); 409 minor = blobmsg_cast_u64(tbwd[OCI_LINUX_CGROUPS_BLOCKIO_WEIGHTDEVICE_MINOR]); 410 411 if (asprintf(&weightstrs[numweightstrs++], "%" PRIu64 ":%" PRIu64 " %u", major, minor, devweight) < 0) 412 return ENOMEM; 413 } 414 415 if (numweightstrs) { 416 curstr = weightstrs; 417 while (*curstr) 418 strtotlen += strlen(*(curstr++)) + 1; 419 420 weightstr = calloc(strtotlen, sizeof(char)); 421 if (!weightstr) 422 exit(ENOMEM); 423 424 curstr = weightstrs; 425 while (*curstr) { 426 strcat(weightstr, *curstr); 427 strcat(weightstr, "\n"); 428 free(*(curstr++)); 429 } 430 431 cgroups_set("io.bfq.weight", weightstr); 432 free(weightstr); 433 }; 434 435 free(weightstrs); 436 437 avl_init(&iomax, avl_devcmp, false, NULL); 438 439 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADBPSDEVICE], rem) { 440 struct iomax_line *l; 441 442 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 443 444 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 445 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 446 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 447 return ENODATA; 448 449 l = get_iomax_line(&iomax, 450 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 451 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 452 453 l->rbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 454 } 455 456 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEBPSDEVICE], rem) { 457 struct iomax_line *l; 458 459 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 460 461 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 462 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 463 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 464 return ENODATA; 465 466 l = get_iomax_line(&iomax, 467 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 468 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 469 470 l->wbps = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 471 } 472 473 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEREADIOPSDEVICE], rem) { 474 struct iomax_line *l; 475 476 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 477 478 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 479 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 480 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 481 return ENODATA; 482 483 l = get_iomax_line(&iomax, 484 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 485 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 486 487 l->riops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 488 } 489 490 blobmsg_for_each_attr(cur, tb[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEWRITEIOPSDEVICE], rem) { 491 struct iomax_line *l; 492 493 blobmsg_parse(oci_linux_cgroups_blockio_throttledevice_policy, __OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAX, tbtd, blobmsg_data(cur), blobmsg_len(cur)); 494 495 if (!tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR] || 496 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR] || 497 !tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]) 498 return ENODATA; 499 500 l = get_iomax_line(&iomax, 501 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MAJOR]), 502 blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_MINOR])); 503 504 l->wiops = blobmsg_cast_u64(tbtd[OCI_LINUX_CGROUPS_BLOCKIO_THROTTLEDEVICE_RATE]); 505 } 506 507 avl_for_each_element(&iomax, curiomax, avl) 508 ++numiomaxstrs; 509 510 if (!numiomaxstrs) 511 return 0; 512 513 iomaxstrs = calloc(numiomaxstrs + 1, sizeof(char *)); 514 if (!iomaxstrs) 515 exit(ENOMEM); 516 517 numiomaxstrs = 0; 518 519 avl_for_each_element(&iomax, curiomax, avl) { 520 char iomaxlstr[160]; 521 char lstr[32]; 522 523 sprintf(iomaxlstr, "%" PRIu64 ":%" PRIu64 " ", curiomax->dev.major, curiomax->dev.minor); 524 525 if (curiomax->rbps != -1) { 526 sprintf(lstr, "rbps=%" PRIu64 " ", curiomax->rbps); 527 strcat(iomaxlstr, lstr); 528 } 529 if (curiomax->wbps != -1) { 530 sprintf(lstr, "wbps=%" PRIu64 " ", curiomax->wbps); 531 strcat(iomaxlstr, lstr); 532 } 533 if (curiomax->riops != -1) { 534 sprintf(lstr, "riops=%" PRIu64 " ", curiomax->riops); 535 strcat(iomaxlstr, lstr); 536 } 537 if (curiomax->wiops != -1) { 538 sprintf(lstr, "wiops=%" PRIu64 " ", curiomax->wiops); 539 strcat(iomaxlstr, lstr); 540 } 541 542 iomaxstrs[numiomaxstrs++] = strdup(iomaxlstr); 543 } 544 545 avl_for_each_element_safe(&iomax, curiomax, avl, tmp) { 546 avl_delete(&iomax, &curiomax->avl); 547 free(curiomax); 548 } 549 550 strtotlen = 1; /* 1 accounts for \0 at end of string */ 551 if (numiomaxstrs) { 552 curstr = iomaxstrs; 553 while (*curstr) 554 strtotlen += strlen(*(curstr++)) + 1; /* +1 accounts for \n at end of line */ 555 556 iomaxstr = calloc(strtotlen, sizeof(char)); 557 if (!iomaxstr) 558 exit(ENOMEM); 559 560 curstr = iomaxstrs; 561 562 while (*curstr) { 563 strcat(iomaxstr, *curstr); 564 strcat(iomaxstr, "\n"); 565 free(*(curstr++)); 566 } 567 568 cgroups_set("io.max", iomaxstr); 569 free(iomaxstr); 570 }; 571 572 free(iomaxstrs); 573 574 return 0; 575 } 576 577 578 enum { 579 OCI_LINUX_CGROUPS_CPU_SHARES, 580 OCI_LINUX_CGROUPS_CPU_PERIOD, 581 OCI_LINUX_CGROUPS_CPU_QUOTA, 582 OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME, 583 OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD, 584 OCI_LINUX_CGROUPS_CPU_CPUS, 585 OCI_LINUX_CGROUPS_CPU_MEMS, 586 __OCI_LINUX_CGROUPS_CPU_MAX, 587 }; 588 589 static const struct blobmsg_policy oci_linux_cgroups_cpu_policy[] = { 590 [OCI_LINUX_CGROUPS_CPU_SHARES] = { "shares", BLOBMSG_CAST_INT64 }, 591 [OCI_LINUX_CGROUPS_CPU_PERIOD] = { "period", BLOBMSG_CAST_INT64 }, 592 [OCI_LINUX_CGROUPS_CPU_QUOTA] = { "quota", BLOBMSG_CAST_INT64 }, /* signed int64! */ 593 [OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] = { "realtimePeriod", BLOBMSG_CAST_INT64 }, 594 [OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME] = { "realtimeRuntime", BLOBMSG_CAST_INT64 }, 595 [OCI_LINUX_CGROUPS_CPU_CPUS] = { "cpus", BLOBMSG_TYPE_STRING }, 596 [OCI_LINUX_CGROUPS_CPU_MEMS] = { "mems", BLOBMSG_TYPE_STRING }, 597 }; 598 599 static int parseOCIlinuxcgroups_legacy_cpu(struct blob_attr *msg) 600 { 601 struct blob_attr *tb[__OCI_LINUX_CGROUPS_CPU_MAX]; 602 uint64_t shares, period = 0; 603 int64_t quota = -2; /* unset */ 604 char tmp[32] = { 0 }; 605 606 blobmsg_parse(oci_linux_cgroups_cpu_policy, __OCI_LINUX_CGROUPS_CPU_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 607 608 if (tb[OCI_LINUX_CGROUPS_CPU_REALTIMEPERIOD] || 609 tb[OCI_LINUX_CGROUPS_CPU_REALTIMERUNTIME]) 610 return ENOTSUP; /* no equivalent in cgroup2 */ 611 612 if (tb[OCI_LINUX_CGROUPS_CPU_SHARES]) { 613 shares = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_SHARES]); 614 if ((shares < 2) || (shares > 262144)) 615 return ERANGE; 616 617 snprintf(tmp, sizeof(tmp), "%" PRIu64, (((uint64_t)1) + ((shares - 2) * 9999) / 262142)); 618 cgroups_set("cpu.weight", tmp); 619 tmp[0] = '\0'; 620 } 621 622 if (tb[OCI_LINUX_CGROUPS_CPU_QUOTA]) 623 quota = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_CPU_QUOTA]); 624 625 if (tb[OCI_LINUX_CGROUPS_CPU_PERIOD]) 626 period = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_CPU_PERIOD]); 627 628 if (period) { 629 if (quota >= 0) 630 snprintf(tmp, sizeof(tmp), "%" PRId64 " %" PRIu64 , quota, period); 631 else 632 snprintf(tmp, sizeof(tmp), "max %" PRIu64, period); /* assume default */ 633 } else if (quota >= 0) { 634 snprintf(tmp, sizeof(tmp), "%" PRId64, quota); 635 } else if (quota == -1) { 636 strcpy(tmp, "max"); 637 } 638 639 if (tmp[0]) 640 cgroups_set("cpu.max", tmp); 641 642 if (tb[OCI_LINUX_CGROUPS_CPU_CPUS]) 643 cgroups_set("cpuset.cpus", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_CPUS])); 644 645 if (tb[OCI_LINUX_CGROUPS_CPU_MEMS]) 646 cgroups_set("cpuset.mems", blobmsg_get_string(tb[OCI_LINUX_CGROUPS_CPU_MEMS])); 647 648 return 0; 649 } 650 651 652 enum { 653 OCI_LINUX_CGROUPS_MEMORY_LIMIT, 654 OCI_LINUX_CGROUPS_MEMORY_RESERVATION, 655 OCI_LINUX_CGROUPS_MEMORY_SWAP, 656 OCI_LINUX_CGROUPS_MEMORY_KERNEL, 657 OCI_LINUX_CGROUPS_MEMORY_KERNELTCP, 658 OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS, 659 OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER, 660 OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY, 661 __OCI_LINUX_CGROUPS_MEMORY_MAX, 662 }; 663 664 static const struct blobmsg_policy oci_linux_cgroups_memory_policy[] = { 665 [OCI_LINUX_CGROUPS_MEMORY_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, /* signed int64! */ 666 [OCI_LINUX_CGROUPS_MEMORY_RESERVATION] = { "reservation", BLOBMSG_CAST_INT64 }, /* signed int64! */ 667 [OCI_LINUX_CGROUPS_MEMORY_SWAP] = { "swap", BLOBMSG_CAST_INT64 }, /* signed int64! */ 668 [OCI_LINUX_CGROUPS_MEMORY_KERNEL] = { "kernel", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */ 669 [OCI_LINUX_CGROUPS_MEMORY_KERNELTCP] = { "kernelTCP", BLOBMSG_CAST_INT64 }, /* signed int64! ignored */ 670 [OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] = { "swappiness", BLOBMSG_CAST_INT64 }, 671 [OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] = { "disableOOMKiller", BLOBMSG_TYPE_BOOL }, 672 [OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY] { "useHierarchy", BLOBMSG_TYPE_BOOL }, 673 }; 674 675 static int parseOCIlinuxcgroups_legacy_memory(struct blob_attr *msg) 676 { 677 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX]; 678 char tmp[32] = { 0 }; 679 int64_t limit = -1, swap, reservation; 680 681 blobmsg_parse(oci_linux_cgroups_memory_policy, __OCI_LINUX_CGROUPS_MEMORY_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 682 683 /* 684 * not all properties of the OCI memory section can be mapped to cgroup2 685 * kernel memory accounting is always enabled and included in the set 686 * memory limit, hence these options can be ignored 687 * disableOOMKiller could be emulated using oom_score_adj + seccomp eBPF 688 * preventing self-upgrade (but allow downgrade) 689 * 690 * see also https://github.com/opencontainers/runtime-spec/issues/1005 691 */ 692 if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAPPINESS] || 693 tb[OCI_LINUX_CGROUPS_MEMORY_DISABLEOOMKILLER] || 694 tb[OCI_LINUX_CGROUPS_MEMORY_USEHIERARCHY]) 695 return ENOTSUP; 696 697 698 if (tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]) { 699 limit = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_LIMIT]); 700 if (limit == -1) 701 strcpy(tmp, "max"); 702 else 703 snprintf(tmp, sizeof(tmp), "%" PRId64, limit); 704 705 cgroups_set("memory.max", tmp); 706 } 707 708 if (tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]) { 709 reservation = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_RESERVATION]); 710 711 if (reservation == -1) 712 strcpy(tmp, "max"); 713 else 714 snprintf(tmp, sizeof(tmp), "%" PRId64, reservation); 715 716 cgroups_set("memory.low", tmp); 717 } 718 719 /* OCI 'swap' acounts for memory+swap */ 720 if (tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]) { 721 swap = blobmsg_cast_s64(tb[OCI_LINUX_CGROUPS_MEMORY_SWAP]); 722 723 if (swap == -1) 724 strcpy(tmp, "max"); 725 else if (limit == -1 || (limit < swap)) 726 snprintf(tmp, sizeof(tmp), "%" PRId64, swap); 727 else 728 snprintf(tmp, sizeof(tmp), "%" PRId64, limit - swap); 729 730 cgroups_set("memory.swap_max", tmp); 731 } 732 733 return 0; 734 } 735 736 737 enum { 738 OCI_LINUX_CGROUPS_PIDS_LIMIT, 739 __OCI_LINUX_CGROUPS_PIDS_MAX, 740 }; 741 742 static const struct blobmsg_policy oci_linux_cgroups_pids_policy[] = { 743 [OCI_LINUX_CGROUPS_PIDS_LIMIT] = { "limit", BLOBMSG_CAST_INT64 }, 744 }; 745 746 static int parseOCIlinuxcgroups_legacy_pids(struct blob_attr *msg) 747 { 748 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MEMORY_MAX]; 749 char tmp[32] = { 0 }; 750 751 blobmsg_parse(oci_linux_cgroups_pids_policy, __OCI_LINUX_CGROUPS_PIDS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 752 753 if (!tb[OCI_LINUX_CGROUPS_PIDS_LIMIT]) 754 return EINVAL; 755 756 snprintf(tmp, sizeof(tmp), "%" PRIu64, blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_PIDS_LIMIT])); 757 758 cgroups_set("pids.max", tmp); 759 760 return 0; 761 } 762 763 static int parseOCIlinuxcgroups_unified(struct blob_attr *msg) 764 { 765 struct blob_attr *cur; 766 int rem; 767 768 blobmsg_for_each_attr(cur, msg, rem) { 769 if (blobmsg_type(cur) != BLOBMSG_TYPE_STRING) 770 return EINVAL; 771 772 /* restrict keys */ 773 if (strchr(blobmsg_name(cur), '/') || 774 !strcmp(blobmsg_name(cur), "cgroup.subtree_control") || 775 !strcmp(blobmsg_name(cur), "cgroup.procs") || 776 !strcmp(blobmsg_name(cur), "cgroup.threads") || 777 !strcmp(blobmsg_name(cur), "cgroup.freeze")) 778 return EINVAL; 779 780 cgroups_set(blobmsg_name(cur), blobmsg_get_string(cur)); 781 } 782 783 return 0; 784 } 785 786 enum { 787 OCI_LINUX_CGROUPS_BLOCKIO, 788 OCI_LINUX_CGROUPS_CPU, 789 OCI_LINUX_CGROUPS_DEVICES, 790 OCI_LINUX_CGROUPS_HUGEPAGELIMITS, 791 OCI_LINUX_CGROUPS_INTELRDT, 792 OCI_LINUX_CGROUPS_MEMORY, 793 OCI_LINUX_CGROUPS_NETWORK, 794 OCI_LINUX_CGROUPS_PIDS, 795 OCI_LINUX_CGROUPS_RDMA, 796 OCI_LINUX_CGROUPS_UNIFIED, 797 __OCI_LINUX_CGROUPS_MAX, 798 }; 799 800 static const struct blobmsg_policy oci_linux_cgroups_policy[] = { 801 [OCI_LINUX_CGROUPS_BLOCKIO] = { "blockIO", BLOBMSG_TYPE_TABLE }, 802 [OCI_LINUX_CGROUPS_CPU] = { "cpu", BLOBMSG_TYPE_TABLE }, 803 [OCI_LINUX_CGROUPS_DEVICES] = { "devices", BLOBMSG_TYPE_ARRAY }, 804 [OCI_LINUX_CGROUPS_HUGEPAGELIMITS] = { "hugepageLimits", BLOBMSG_TYPE_ARRAY }, 805 [OCI_LINUX_CGROUPS_INTELRDT] = { "intelRdt", BLOBMSG_TYPE_TABLE }, 806 [OCI_LINUX_CGROUPS_MEMORY] = { "memory", BLOBMSG_TYPE_TABLE }, 807 [OCI_LINUX_CGROUPS_NETWORK] = { "network", BLOBMSG_TYPE_TABLE }, 808 [OCI_LINUX_CGROUPS_PIDS] = { "pids", BLOBMSG_TYPE_TABLE }, 809 [OCI_LINUX_CGROUPS_RDMA] = { "rdma", BLOBMSG_TYPE_TABLE }, 810 [OCI_LINUX_CGROUPS_UNIFIED] = { "unified", BLOBMSG_TYPE_TABLE }, 811 }; 812 813 int parseOCIlinuxcgroups(struct blob_attr *msg) 814 { 815 struct blob_attr *tb[__OCI_LINUX_CGROUPS_MAX]; 816 int ret; 817 818 blobmsg_parse(oci_linux_cgroups_policy, __OCI_LINUX_CGROUPS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); 819 820 if (tb[OCI_LINUX_CGROUPS_HUGEPAGELIMITS] || 821 tb[OCI_LINUX_CGROUPS_INTELRDT] || 822 tb[OCI_LINUX_CGROUPS_NETWORK] || 823 tb[OCI_LINUX_CGROUPS_RDMA]) 824 return ENOTSUP; 825 826 if (tb[OCI_LINUX_CGROUPS_BLOCKIO]) { 827 ret = parseOCIlinuxcgroups_legacy_blockio(tb[OCI_LINUX_CGROUPS_BLOCKIO]); 828 if (ret) 829 return ret; 830 } 831 832 if (tb[OCI_LINUX_CGROUPS_CPU]) { 833 ret = parseOCIlinuxcgroups_legacy_cpu(tb[OCI_LINUX_CGROUPS_CPU]); 834 if (ret) 835 return ret; 836 } 837 838 if (tb[OCI_LINUX_CGROUPS_DEVICES]) { 839 ret = parseOCIlinuxcgroups_devices(tb[OCI_LINUX_CGROUPS_DEVICES]); 840 if (ret) 841 return ret; 842 } 843 844 if (tb[OCI_LINUX_CGROUPS_MEMORY]) { 845 ret = parseOCIlinuxcgroups_legacy_memory(tb[OCI_LINUX_CGROUPS_MEMORY]); 846 if (ret) 847 return ret; 848 } 849 850 if (tb[OCI_LINUX_CGROUPS_PIDS]) { 851 ret = parseOCIlinuxcgroups_legacy_pids(tb[OCI_LINUX_CGROUPS_PIDS]); 852 if (ret) 853 return ret; 854 } 855 856 if (tb[OCI_LINUX_CGROUPS_UNIFIED]) { 857 ret = parseOCIlinuxcgroups_unified(tb[OCI_LINUX_CGROUPS_UNIFIED]); 858 if (ret) 859 return ret; 860 } 861 862 return 0; 863 } 864
This page was automatically generated by LXR 0.3.1. • OpenWrt