1 /* 2 * Copyright (C) 2021 Daniel Golle <daniel@makrotopia.org> 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU Lesser General Public License version 2.1 6 * as published by the Free Software Foundation 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * somehow emulate devices.allow/devices.deny using eBPF 14 * 15 * OCI run-time spec defines the syntax for allowing/denying access 16 * to devices according to the definition of cgroup-v1 in the Kernel 17 * as described in Documentation/admin-guide/cgroup-v1. 18 */ 19 20 #include <assert.h> 21 #include <linux/bpf.h> 22 #ifdef __GLIBC__ 23 #include <sys/cdefs.h> 24 #else 25 #include <sys/reg.h> 26 #endif 27 #include <sys/syscall.h> 28 29 #include <libubox/blobmsg.h> 30 #include <libubox/blobmsg_json.h> 31 #include <libubox/list.h> 32 33 #include "cgroups.h" 34 #include "cgroups-bpf.h" 35 #include "log.h" 36 37 static struct bpf_insn *program = NULL; 38 static int bpf_total_insn = 0; 39 static const char *license = "GPL"; 40 41 static int 42 syscall_bpf (int cmd, union bpf_attr *attr, unsigned int size) 43 { 44 return (int) syscall (__NR_bpf, cmd, attr, size); 45 } 46 47 /* from crun/src/libcrun/ebpf.c */ 48 #define BPF_ALU32_IMM(OP, DST, IMM) \ 49 ((struct bpf_insn){ .code = BPF_ALU | BPF_OP (OP) | BPF_K, .dst_reg = DST, .src_reg = 0, .off = 0, .imm = IMM }) 50 51 #define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ 52 ((struct bpf_insn){ \ 53 .code = BPF_LDX | BPF_SIZE (SIZE) | BPF_MEM, .dst_reg = DST, .src_reg = SRC, .off = OFF, .imm = 0 }) 54 55 #define BPF_MOV64_REG(DST, SRC) \ 56 ((struct bpf_insn){ .code = BPF_ALU64 | BPF_MOV | BPF_X, .dst_reg = DST, .src_reg = SRC, .off = 0, .imm = 0 }) 57 58 #define BPF_JMP_A(OFF) \ 59 ((struct bpf_insn){ .code = BPF_JMP | BPF_JA, .dst_reg = 0, .src_reg = 0, .off = OFF, .imm = 0 }) 60 61 #define BPF_JMP_IMM(OP, DST, IMM, OFF) \ 62 ((struct bpf_insn){ .code = BPF_JMP | BPF_OP (OP) | BPF_K, .dst_reg = DST, .src_reg = 0, .off = OFF, .imm = IMM }) 63 64 #define BPF_JMP_REG(OP, DST, SRC, OFF) \ 65 ((struct bpf_insn){ .code = BPF_JMP | BPF_OP (OP) | BPF_X, .dst_reg = DST, .src_reg = SRC, .off = OFF, .imm = 0 }) 66 67 #define BPF_MOV64_IMM(DST, IMM) \ 68 ((struct bpf_insn){ .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = DST, .src_reg = 0, .off = 0, .imm = IMM }) 69 70 #define BPF_MOV32_REG(DST, SRC) \ 71 ((struct bpf_insn){ .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = DST, .src_reg = SRC, .off = 0, .imm = 0 }) 72 73 #define BPF_EXIT_INSN() \ 74 ((struct bpf_insn){ .code = BPF_JMP | BPF_EXIT, .dst_reg = 0, .src_reg = 0, .off = 0, .imm = 0 }) 75 76 /* taken from systemd. */ 77 static const struct bpf_insn pre_insn[] = { 78 /* type -> R2. */ 79 BPF_LDX_MEM (BPF_W, BPF_REG_2, BPF_REG_1, 0), 80 BPF_ALU32_IMM (BPF_AND, BPF_REG_2, 0xFFFF), 81 /* access -> R3. */ 82 BPF_LDX_MEM (BPF_W, BPF_REG_3, BPF_REG_1, 0), 83 BPF_ALU32_IMM (BPF_RSH, BPF_REG_3, 16), 84 /* major -> R4. */ 85 BPF_LDX_MEM (BPF_W, BPF_REG_4, BPF_REG_1, 4), 86 /* minor -> R5. */ 87 BPF_LDX_MEM (BPF_W, BPF_REG_5, BPF_REG_1, 8), 88 }; 89 90 enum { 91 OCI_LINUX_CGROUPS_DEVICES_ALLOW, 92 OCI_LINUX_CGROUPS_DEVICES_TYPE, 93 OCI_LINUX_CGROUPS_DEVICES_MAJOR, 94 OCI_LINUX_CGROUPS_DEVICES_MINOR, 95 OCI_LINUX_CGROUPS_DEVICES_ACCESS, 96 __OCI_LINUX_CGROUPS_DEVICES_MAX, 97 }; 98 99 static const struct blobmsg_policy oci_linux_cgroups_devices_policy[] = { 100 [OCI_LINUX_CGROUPS_DEVICES_ALLOW] = { "allow", BLOBMSG_TYPE_BOOL }, 101 [OCI_LINUX_CGROUPS_DEVICES_TYPE] = { "type", BLOBMSG_TYPE_STRING }, 102 [OCI_LINUX_CGROUPS_DEVICES_MAJOR] = { "major", BLOBMSG_CAST_INT64 }, 103 [OCI_LINUX_CGROUPS_DEVICES_MINOR] = { "minor", BLOBMSG_CAST_INT64 }, 104 [OCI_LINUX_CGROUPS_DEVICES_ACCESS] = { "access", BLOBMSG_TYPE_STRING }, 105 }; 106 107 /* 108 * cgroup-v1 devices got a (default) behaviour and a list of exceptions. 109 * define datatypes similar to the legacy kernel code. 110 */ 111 #define DEVCG_DEV_ALL (BPF_DEVCG_DEV_BLOCK | BPF_DEVCG_DEV_CHAR) 112 #define DEVCG_ACC_ALL (BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD) 113 114 enum devcg_behavior { 115 DEVCG_DEFAULT_NONE, 116 DEVCG_DEFAULT_ALLOW, 117 DEVCG_DEFAULT_DENY, 118 }; 119 120 struct dev_exception_item { 121 uint32_t major, minor; 122 short type; 123 short access; 124 struct list_head list; 125 bool allow; 126 }; 127 128 /* 129 * add a bunch of default rules 130 */ 131 static int add_default_exceptions(struct list_head *exceptions) 132 { 133 int i, ret = 0; 134 struct dev_exception_item *cur; 135 /* from crun/src/libcrun/cgroup.c */ 136 const struct dev_exception_item defrules[] = { 137 /* always allow mknod */ 138 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = ~0, .minor = ~0, .access = BPF_DEVCG_ACC_MKNOD }, 139 { .allow = true, .type = BPF_DEVCG_DEV_BLOCK, .major = ~0, .minor = ~0, .access = BPF_DEVCG_ACC_MKNOD }, 140 /* /dev/null */ 141 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 3, .access = DEVCG_ACC_ALL }, 142 /* /dev/random */ 143 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 8, .access = DEVCG_ACC_ALL }, 144 /* /dev/full */ 145 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 7, .access = DEVCG_ACC_ALL }, 146 /* /dev/tty */ 147 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 5, .minor = 0, .access = DEVCG_ACC_ALL }, 148 /* /dev/zero */ 149 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 5, .access = DEVCG_ACC_ALL }, 150 /* /dev/urandom */ 151 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 1, .minor = 9, .access = DEVCG_ACC_ALL }, 152 /* /dev/console */ 153 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 5, .minor = 1, .access = DEVCG_ACC_ALL }, 154 /* /dev/pts/[0-255] */ 155 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 136, .minor = ~0, .access = DEVCG_ACC_ALL }, 156 /* /dev/ptmx */ 157 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 5, .minor = 2, .access = DEVCG_ACC_ALL }, 158 /* /dev/net/tun */ 159 { .allow = true, .type = BPF_DEVCG_DEV_CHAR, .major = 10, .minor = 200, .access = DEVCG_ACC_ALL }, 160 }; 161 162 for (i = 0; i < (sizeof(defrules) / sizeof(struct dev_exception_item)); ++i) { 163 cur = malloc(sizeof(struct dev_exception_item)); 164 if (!cur) { 165 ret = ENOMEM; 166 break; 167 } 168 /* add defaults to list in reverse order (last item will be first in list) */ 169 memcpy(cur, &defrules[i], sizeof(struct dev_exception_item)); 170 list_add(&cur->list, exceptions); 171 } 172 173 return ret; 174 } 175 176 /* 177 * free all exceptions in the list 178 */ 179 static void flush_exceptions(struct list_head *freelist) 180 { 181 struct dev_exception_item *dl, *dln; 182 183 if (!list_empty(freelist)) 184 list_for_each_entry_safe(dl, dln, freelist, list) { 185 list_del(&dl->list); 186 free(dl); 187 } 188 } 189 190 /* 191 * parse OCI cgroups devices and translate into cgroups-v2 eBPF program 192 */ 193 int parseOCIlinuxcgroups_devices(struct blob_attr *msg) 194 { 195 struct blob_attr *tb[__OCI_LINUX_CGROUPS_DEVICES_MAX]; 196 struct blob_attr *cur; 197 int rem, ret = 0; 198 int bpf_type, bpf_access; 199 unsigned char acidx; 200 bool allow = false, 201 has_access = false, 202 has_type = false, 203 has_major = false, 204 has_minor = false; 205 int total_ins = 0, 206 cur_ins = 0, 207 pre_insn_len = sizeof(pre_insn) / sizeof(struct bpf_insn), 208 next_ins; 209 char *access, *devtype; 210 uint32_t devmajor, devminor; 211 struct dev_exception_item *dl; 212 struct list_head exceptions; 213 enum devcg_behavior behavior = DEVCG_DEFAULT_ALLOW; 214 INIT_LIST_HEAD(&exceptions); 215 216 /* parse according to OCI spec */ 217 blobmsg_for_each_attr(cur, msg, rem) { 218 blobmsg_parse(oci_linux_cgroups_devices_policy, __OCI_LINUX_CGROUPS_DEVICES_MAX, 219 tb, blobmsg_data(cur), blobmsg_len(cur)); 220 221 if (!tb[OCI_LINUX_CGROUPS_DEVICES_ALLOW]) { 222 ret = EINVAL; 223 goto out; 224 } 225 226 allow = blobmsg_get_bool(tb[OCI_LINUX_CGROUPS_DEVICES_ALLOW]); 227 228 bpf_access = 0; 229 if (tb[OCI_LINUX_CGROUPS_DEVICES_ACCESS]) { 230 access = blobmsg_get_string(tb[OCI_LINUX_CGROUPS_DEVICES_ACCESS]); 231 if ((strlen(access) > 3) || (strlen(access) == 0)) { 232 ret = EINVAL; 233 goto out; 234 } 235 236 for (acidx = 0; acidx < strlen(access); ++acidx) { 237 switch (access[acidx]) { 238 case 'r': 239 bpf_access |= BPF_DEVCG_ACC_READ; 240 break; 241 case 'w': 242 bpf_access |= BPF_DEVCG_ACC_WRITE; 243 break; 244 case 'm': 245 bpf_access |= BPF_DEVCG_ACC_MKNOD; 246 break; 247 default: 248 ret = EINVAL; 249 goto out; 250 } 251 } 252 } 253 254 if (!bpf_access) 255 bpf_access = DEVCG_ACC_ALL; 256 257 bpf_type = 0; 258 if (tb[OCI_LINUX_CGROUPS_DEVICES_TYPE]) { 259 devtype = blobmsg_get_string(tb[OCI_LINUX_CGROUPS_DEVICES_TYPE]); 260 261 switch (devtype[0]) { 262 case 'c': 263 bpf_type = BPF_DEVCG_DEV_CHAR; 264 break; 265 case 'b': 266 bpf_type = BPF_DEVCG_DEV_BLOCK; 267 break; 268 case 'a': 269 bpf_type = DEVCG_DEV_ALL; 270 break; 271 default: 272 ret = EINVAL; 273 goto out; 274 } 275 } 276 277 if (!bpf_type) 278 bpf_type = DEVCG_DEV_ALL; 279 280 if (tb[OCI_LINUX_CGROUPS_DEVICES_MAJOR]) 281 devmajor = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_DEVICES_MAJOR]); 282 else 283 devmajor = ~0; 284 285 if (tb[OCI_LINUX_CGROUPS_DEVICES_MINOR]) 286 devminor = blobmsg_cast_u64(tb[OCI_LINUX_CGROUPS_DEVICES_MINOR]); 287 else 288 devminor = ~0; 289 290 if (bpf_type == DEVCG_DEV_ALL) { 291 /* wildcard => change default policy and flush all existing rules */ 292 flush_exceptions(&exceptions); 293 behavior = allow?DEVCG_DEFAULT_ALLOW:DEVCG_DEFAULT_DENY; 294 } else { 295 /* allocate and populate record for exception */ 296 dl = malloc(sizeof(struct dev_exception_item)); 297 if (!dl) { 298 ret = ENOSPC; 299 break; 300 } 301 dl->allow = allow; 302 dl->type = bpf_type; 303 dl->access = bpf_access; 304 dl->major = devmajor; 305 dl->minor = devminor; 306 307 /* push to exceptions list, last goes first */ 308 list_add(&dl->list, &exceptions); 309 } 310 } 311 if (ret) 312 goto out; 313 314 /* add default rules */ 315 ret = add_default_exceptions(&exceptions); 316 if (ret) 317 goto out; 318 319 /* calculate number of instructions to allocate */ 320 list_for_each_entry(dl, &exceptions, list) { 321 has_access = dl->access != DEVCG_ACC_ALL; 322 has_type = dl->type != DEVCG_DEV_ALL; 323 has_major = dl->major != ~0; 324 has_minor = dl->minor != ~0; 325 326 total_ins += (has_type ? 1 : 0) + (has_access ? 3 : 0) + (has_major ? 1 : 0) + (has_minor ? 1 : 0) + 2; 327 } 328 329 /* acccount for loader instructions */ 330 total_ins += pre_insn_len; 331 332 /* final accept/deny block */ 333 total_ins += 2; 334 335 /* allocate memory for eBPF program */ 336 program = calloc(total_ins, sizeof(struct bpf_insn)); 337 if (!program) { 338 ret = ENOMEM; 339 goto out; 340 } 341 342 /* copy program loader instructions */ 343 memcpy(program, &pre_insn, sizeof(pre_insn)); 344 cur_ins = pre_insn_len; 345 346 /* generate eBPF program */ 347 list_for_each_entry(dl, &exceptions, list) { 348 has_access = dl->access != DEVCG_ACC_ALL; 349 has_type = dl->type != DEVCG_DEV_ALL; 350 has_major = dl->major != ~0; 351 has_minor = dl->minor != ~0; 352 353 next_ins = (has_type ? 1 : 0) + (has_access ? 3 : 0) + (has_major ? 1 : 0) + (has_minor ? 1 : 0) + 1; 354 355 if (has_type) { 356 program[cur_ins++] = BPF_JMP_IMM(BPF_JNE, BPF_REG_2, dl->type, next_ins); 357 --next_ins; 358 } 359 360 if (has_access) { 361 program[cur_ins++] = BPF_MOV32_REG(BPF_REG_1, BPF_REG_3); 362 program[cur_ins++] = BPF_ALU32_IMM(BPF_AND, BPF_REG_1, dl->access); 363 program[cur_ins++] = BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, next_ins - 2); 364 next_ins -= 3; 365 } 366 367 if (has_major) { 368 program[cur_ins++] = BPF_JMP_IMM(BPF_JNE, BPF_REG_4, dl->major, next_ins); 369 --next_ins; 370 } 371 372 if (has_minor) { 373 program[cur_ins++] = BPF_JMP_IMM(BPF_JNE, BPF_REG_5, dl->minor, next_ins); 374 --next_ins; 375 } 376 377 program[cur_ins++] = BPF_MOV64_IMM(BPF_REG_0, dl->allow ? 1 : 0); 378 program[cur_ins++] = BPF_EXIT_INSN(); 379 } 380 381 /* default behavior */ 382 program[cur_ins++] = BPF_MOV64_IMM(BPF_REG_0, (behavior == DEVCG_DEFAULT_ALLOW)?1:0); 383 program[cur_ins++] = BPF_EXIT_INSN(); 384 385 if (debug) { 386 fprintf(stderr, "cgroup devices:\na > devices.%s\n", 387 (behavior == DEVCG_DEFAULT_ALLOW)?"allow":"deny"); 388 389 list_for_each_entry(dl, &exceptions, list) 390 fprintf(stderr, "%c %d:%d %s%s%s > devices.%s\n", 391 (dl->type == DEVCG_DEV_ALL)?'a': 392 (dl->type == BPF_DEVCG_DEV_CHAR)?'c':'b', 393 (dl->major == ~0)?-1:dl->major, 394 (dl->minor == ~0)?-1:dl->minor, 395 (dl->access & BPF_DEVCG_ACC_READ)?"r":"", 396 (dl->access & BPF_DEVCG_ACC_WRITE)?"w":"", 397 (dl->access & BPF_DEVCG_ACC_MKNOD)?"m":"", 398 (dl->allow)?"allow":"deny"); 399 400 fprintf(stderr, "generated cgroup-devices eBPF program:\n"); 401 fprintf(stderr, " [idx]\tcode\t dest\t src\t off\t imm\n"); 402 for (cur_ins=0; cur_ins<total_ins; cur_ins++) 403 fprintf(stderr, " [%03d]\t%02hhx\t%3hhu\t%3hhu\t%04hx\t%d\n", cur_ins, 404 program[cur_ins].code, 405 program[cur_ins].dst_reg, 406 program[cur_ins].src_reg, 407 program[cur_ins].off, 408 program[cur_ins].imm); 409 } 410 411 assert(cur_ins == total_ins); 412 bpf_total_insn = total_ins; 413 ret = 0; 414 415 out: 416 flush_exceptions(&exceptions); 417 return ret; 418 } 419 420 /* 421 * attach eBPF program to cgroup 422 */ 423 int attach_cgroups_ebpf(int cgroup_dirfd) { 424 int prog_fd; 425 #if ( __WORDSIZE == 64 ) 426 uint64_t program_ptr = (uint64_t)program; 427 uint64_t license_ptr = (uint64_t)license; 428 #elif ( __WORDSIZE == 32 ) 429 uint32_t program_ptr = (uint32_t)program; 430 uint32_t license_ptr = (uint32_t)license; 431 #else 432 #error 433 #endif 434 union bpf_attr load_attr = { 435 .prog_type = BPF_PROG_TYPE_CGROUP_DEVICE, 436 .license = license_ptr, 437 .insns = program_ptr, 438 .insn_cnt = bpf_total_insn, 439 }; 440 441 if (!program) 442 return 0; 443 444 prog_fd = syscall_bpf(BPF_PROG_LOAD, &load_attr, sizeof(load_attr)); 445 if (prog_fd < 0) 446 return EIO; 447 448 union bpf_attr attach_attr = { 449 .attach_type = BPF_CGROUP_DEVICE, 450 .target_fd = cgroup_dirfd, 451 .attach_bpf_fd = prog_fd, 452 }; 453 454 return syscall_bpf(BPF_PROG_ATTACH, &attach_attr, sizeof (attach_attr)); 455 } 456
This page was automatically generated by LXR 0.3.1. • OpenWrt