1 /* 2 * parse and setup OCI seccomp filter 3 * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org> 4 * seccomp example with syscall reporting 5 * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org> 6 * Authors: 7 * Kees Cook <keescook@chromium.org> 8 * Will Drewry <wad@chromium.org> 9 * 10 * Use of this source code is governed by a BSD-style license that can be 11 * found in the LICENSE file. 12 * 13 * BPF control flow 14 * 15 * (check_arch)<t>---(check_syscall)<f>---+----[...]<f>---(return default_action) 16 * |<f> |<t> | 17 * KILL (check_argument)<f>--+ 18 * |<t> 19 * [...] 20 * |<t> 21 * (return action) 22 */ 23 #define _GNU_SOURCE 1 24 #include <assert.h> 25 #include <stddef.h> 26 #include <stdlib.h> 27 #include <unistd.h> 28 29 #include <libubox/utils.h> 30 #include <libubox/blobmsg.h> 31 #include <libubox/blobmsg_json.h> 32 33 #include "log.h" 34 #include "seccomp-bpf.h" 35 #include "seccomp-oci.h" 36 #include "../syscall-names.h" 37 #include "seccomp-syscalls-helpers.h" 38 39 static uint32_t resolve_action(char *actname) 40 { 41 if (!strcmp(actname, "SCMP_ACT_KILL")) 42 return SECCOMP_RET_KILL; 43 else if (!strcmp(actname, "SCMP_ACT_KILL_PROCESS")) 44 return SECCOMP_RET_KILLPROCESS; 45 else if (!strcmp(actname, "SCMP_ACT_TRAP")) 46 return SECCOMP_RET_TRAP; 47 else if (!strcmp(actname, "SCMP_ACT_ERRNO")) 48 return SECCOMP_RET_ERRNO; 49 else if (!strcmp(actname, "SCMP_ACT_ERROR")) 50 return SECCOMP_RET_ERRNO; 51 else if (!strcmp(actname, "SCMP_ACT_TRACE")) 52 return SECCOMP_RET_TRACE; 53 else if (!strcmp(actname, "SCMP_ACT_ALLOW")) 54 return SECCOMP_RET_ALLOW; 55 else if (!strcmp(actname, "SCMP_ACT_LOG")) 56 return SECCOMP_RET_LOGALLOW; 57 else { 58 ERROR("unknown seccomp action %s\n", actname); 59 return SECCOMP_RET_KILL; 60 } 61 } 62 63 static uint8_t resolve_op_ins(const char *op) 64 { 65 if (!strcmp(op, "SCMP_CMP_NE")) /* invert EQ */ 66 return BPF_JEQ; 67 else if (!strcmp(op, "SCMP_CMP_LT")) /* invert GE */ 68 return BPF_JGE; 69 else if (!strcmp(op, "SCMP_CMP_LE")) /* invert GT */ 70 return BPF_JGT; 71 else if (!strcmp(op, "SCMP_CMP_EQ")) 72 return BPF_JEQ; 73 else if (!strcmp(op, "SCMP_CMP_GE")) 74 return BPF_JGE; 75 else if (!strcmp(op, "SCMP_CMP_GT")) 76 return BPF_JGT; 77 else if (!strcmp(op, "SCMP_CMP_MASKED_EQ")) 78 return BPF_JEQ; 79 else { 80 ERROR("unknown seccomp op %s\n", op); 81 return 0; 82 } 83 } 84 85 static bool resolve_op_is_masked(const char *op) 86 { 87 if (!strcmp(op, "SCMP_CMP_MASKED_EQ")) 88 return true; 89 90 return false; 91 } 92 93 static bool resolve_op_inv(const char *op) 94 { 95 if (!strcmp(op, "SCMP_CMP_NE") || 96 !strcmp(op, "SCMP_CMP_LT") || 97 !strcmp(op, "SCMP_CMP_LE")) 98 return true; 99 100 return false; 101 } 102 103 static uint32_t resolve_architecture(char *archname) 104 { 105 if (!archname) 106 return 0; 107 108 if (!strcmp(archname, "SCMP_ARCH_X86")) 109 return AUDIT_ARCH_I386; 110 else if (!strcmp(archname, "SCMP_ARCH_X86_64")) 111 return AUDIT_ARCH_X86_64; 112 else if (!strcmp(archname, "SCMP_ARCH_X32")) 113 /* 114 * return AUDIT_ARCH_X86_64; 115 * 32-bit userland on 64-bit kernel is not supported yet 116 */ 117 return 0; 118 else if (!strcmp(archname, "SCMP_ARCH_ARM")) 119 return AUDIT_ARCH_ARM; 120 else if (!strcmp(archname, "SCMP_ARCH_AARCH64")) 121 return AUDIT_ARCH_AARCH64; 122 else if (!strcmp(archname, "SCMP_ARCH_LOONGARCH64")) 123 return AUDIT_ARCH_LOONGARCH64; 124 else if (!strcmp(archname, "SCMP_ARCH_MIPS")) 125 return AUDIT_ARCH_MIPS; 126 else if (!strcmp(archname, "SCMP_ARCH_MIPS64")) 127 return AUDIT_ARCH_MIPS64; 128 else if (!strcmp(archname, "SCMP_ARCH_MIPS64N32")) 129 return AUDIT_ARCH_MIPS64N32; 130 else if (!strcmp(archname, "SCMP_ARCH_MIPSEL")) 131 return AUDIT_ARCH_MIPSEL; 132 else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64")) 133 return AUDIT_ARCH_MIPSEL64; 134 else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64N32")) 135 return AUDIT_ARCH_MIPSEL64N32; 136 else if (!strcmp(archname, "SCMP_ARCH_PPC")) 137 return AUDIT_ARCH_PPC; 138 else if (!strcmp(archname, "SCMP_ARCH_PPC64")) 139 return AUDIT_ARCH_PPC64; 140 else if (!strcmp(archname, "SCMP_ARCH_PPC64LE")) 141 return AUDIT_ARCH_PPC64LE; 142 else if (!strcmp(archname, "SCMP_ARCH_S390")) 143 return AUDIT_ARCH_S390; 144 else if (!strcmp(archname, "SCMP_ARCH_S390X")) 145 return AUDIT_ARCH_S390X; 146 else if (!strcmp(archname, "SCMP_ARCH_PARISC")) 147 return AUDIT_ARCH_PARISC; 148 else if (!strcmp(archname, "SCMP_ARCH_PARISC64")) 149 return AUDIT_ARCH_PARISC64; 150 else { 151 ERROR("unknown seccomp architecture %s\n", archname); 152 return 0; 153 } 154 } 155 156 enum { 157 OCI_LINUX_SECCOMP_DEFAULTACTION, 158 OCI_LINUX_SECCOMP_ARCHITECTURES, 159 OCI_LINUX_SECCOMP_FLAGS, 160 OCI_LINUX_SECCOMP_SYSCALLS, 161 __OCI_LINUX_SECCOMP_MAX, 162 }; 163 164 static const struct blobmsg_policy oci_linux_seccomp_policy[] = { 165 [OCI_LINUX_SECCOMP_DEFAULTACTION] = { "defaultAction", BLOBMSG_TYPE_STRING }, 166 [OCI_LINUX_SECCOMP_ARCHITECTURES] = { "architectures", BLOBMSG_TYPE_ARRAY }, 167 [OCI_LINUX_SECCOMP_FLAGS] = { "flags", BLOBMSG_TYPE_ARRAY }, 168 [OCI_LINUX_SECCOMP_SYSCALLS] = { "syscalls", BLOBMSG_TYPE_ARRAY }, 169 }; 170 171 enum { 172 OCI_LINUX_SECCOMP_SYSCALLS_NAMES, 173 OCI_LINUX_SECCOMP_SYSCALLS_ACTION, 174 OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET, 175 OCI_LINUX_SECCOMP_SYSCALLS_ARGS, 176 __OCI_LINUX_SECCOMP_SYSCALLS_MAX 177 }; 178 179 static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy[] = { 180 [OCI_LINUX_SECCOMP_SYSCALLS_NAMES] = { "names", BLOBMSG_TYPE_ARRAY }, 181 [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET] = { "errnoRet", BLOBMSG_TYPE_INT32 }, 182 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY }, 183 [OCI_LINUX_SECCOMP_SYSCALLS_ACTION] = { "action", BLOBMSG_TYPE_STRING }, 184 }; 185 186 enum { 187 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX, 188 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE, 189 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO, 190 OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP, 191 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX 192 }; 193 194 static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy[] = { 195 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] = { "index", BLOBMSG_TYPE_INT32 }, 196 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] = { "value", BLOBMSG_CAST_INT64 }, 197 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO] = { "valueTwo", BLOBMSG_CAST_INT64 }, 198 [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP] = { "op", BLOBMSG_TYPE_STRING }, 199 }; 200 201 struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg) 202 { 203 struct blob_attr *tb[__OCI_LINUX_SECCOMP_MAX]; 204 struct blob_attr *tbn[__OCI_LINUX_SECCOMP_SYSCALLS_MAX]; 205 struct blob_attr *tba[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX]; 206 struct blob_attr *cur, *curn, *curarg; 207 int rem, remn, remargs, sc; 208 struct sock_filter *filter; 209 struct sock_fprog *prog; 210 int sz = 4, idx = 0; 211 uint32_t default_policy = 0; 212 uint32_t seccomp_arch; 213 bool arch_matched; 214 char *op_str; 215 216 blobmsg_parse(oci_linux_seccomp_policy, __OCI_LINUX_SECCOMP_MAX, 217 tb, blobmsg_data(msg), blobmsg_len(msg)); 218 219 if (!tb[OCI_LINUX_SECCOMP_DEFAULTACTION]) { 220 ERROR("seccomp: no default action set\n"); 221 return NULL; 222 } 223 224 default_policy = resolve_action(blobmsg_get_string(tb[OCI_LINUX_SECCOMP_DEFAULTACTION])); 225 226 /* verify architecture while ignoring the x86_64 anomaly for now */ 227 if (tb[OCI_LINUX_SECCOMP_ARCHITECTURES]) { 228 arch_matched = false; 229 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_ARCHITECTURES], rem) { 230 seccomp_arch = resolve_architecture(blobmsg_get_string(cur)); 231 if (ARCH_NR == seccomp_arch) { 232 arch_matched = true; 233 break; 234 } 235 } 236 if (!arch_matched) { 237 ERROR("seccomp architecture doesn't match system\n"); 238 return NULL; 239 } 240 } 241 242 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) { 243 sz += 2; /* load and return */ 244 245 blobmsg_parse(oci_linux_seccomp_syscalls_policy, 246 __OCI_LINUX_SECCOMP_SYSCALLS_MAX, 247 tbn, blobmsg_data(cur), blobmsg_len(cur)); 248 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) { 249 sc = find_syscall(blobmsg_get_string(curn)); 250 if (sc == -1) { 251 DEBUG("unknown syscall '%s'\n", blobmsg_get_string(curn)); 252 /* TODO: support run.oci.seccomp_fail_unknown_syscall=1 annotation */ 253 continue; 254 } 255 ++sz; 256 } 257 258 if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS]) { 259 blobmsg_for_each_attr(curarg, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remargs) { 260 sz += 2; /* load and compare */ 261 262 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy, 263 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX, 264 tba, blobmsg_data(curarg), blobmsg_len(curarg)); 265 if (!tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] || 266 !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] || 267 !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]) 268 return NULL; 269 270 if (blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]) > 5) 271 return NULL; 272 273 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]); 274 if (!resolve_op_ins(op_str)) 275 return NULL; 276 277 if (resolve_op_is_masked(op_str)) 278 ++sz; /* SCMP_CMP_MASKED_EQ needs an extra BPF_AND op */ 279 } 280 } 281 } 282 283 if (sz < 6) 284 return NULL; 285 286 prog = malloc(sizeof(struct sock_fprog)); 287 if (!prog) 288 return NULL; 289 290 filter = calloc(sz, sizeof(struct sock_filter)); 291 if (!filter) { 292 ERROR("failed to allocate memory for seccomp filter\n"); 293 goto errout2; 294 } 295 296 /* validate arch */ 297 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr); 298 set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR); 299 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL); 300 301 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) { 302 uint32_t action; 303 uint32_t op_idx; 304 uint8_t op_ins; 305 bool op_inv, op_masked; 306 uint64_t op_val, op_val2; 307 int start_rule_idx; 308 int next_rule_idx; 309 310 blobmsg_parse(oci_linux_seccomp_syscalls_policy, 311 __OCI_LINUX_SECCOMP_SYSCALLS_MAX, 312 tbn, blobmsg_data(cur), blobmsg_len(cur)); 313 action = resolve_action(blobmsg_get_string( 314 tbn[OCI_LINUX_SECCOMP_SYSCALLS_ACTION])); 315 if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]) { 316 if (action != SECCOMP_RET_ERRNO) 317 goto errout1; 318 319 action = SECCOMP_RET_ERROR(blobmsg_get_u32( 320 tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET])); 321 } else if (action == SECCOMP_RET_ERRNO) 322 action = SECCOMP_RET_ERROR(EPERM); 323 324 /* load syscall */ 325 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr); 326 327 /* get number of syscall names */ 328 next_rule_idx = idx; 329 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) { 330 if (find_syscall(blobmsg_get_string(curn)) == -1) 331 continue; 332 333 ++next_rule_idx; 334 } 335 start_rule_idx = next_rule_idx; 336 337 /* calculate length of argument filter rules */ 338 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) { 339 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy, 340 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX, 341 tba, blobmsg_data(curn), blobmsg_len(curn)); 342 next_rule_idx += 2; 343 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]); 344 if (resolve_op_is_masked(op_str)) 345 ++next_rule_idx; 346 } 347 348 ++next_rule_idx; /* account for return action */ 349 350 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) { 351 sc = find_syscall(blobmsg_get_string(curn)); 352 if (sc == -1) 353 continue; 354 /* 355 * check syscall, skip other syscall checks if match is found. 356 * if no match is found, jump to next section 357 */ 358 set_filter(&filter[idx], BPF_JMP + BPF_JEQ + BPF_K, 359 start_rule_idx - (idx + 1), 360 ((idx + 1) == start_rule_idx)?(next_rule_idx - (idx + 1)):0, 361 sc); 362 ++idx; 363 } 364 365 assert(idx = start_rule_idx); 366 367 /* generate argument filter rules */ 368 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) { 369 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy, 370 __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX, 371 tba, blobmsg_data(curn), blobmsg_len(curn)); 372 373 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]); 374 op_ins = resolve_op_ins(op_str); 375 op_inv = resolve_op_inv(op_str); 376 op_masked = resolve_op_is_masked(op_str); 377 op_idx = blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]); 378 op_val = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE]); 379 if (tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO]) 380 op_val2 = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO]); 381 else 382 op_val2 = 0; 383 384 /* load argument */ 385 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_arg(op_idx)); 386 387 /* apply mask */ 388 if (op_masked) 389 set_filter(&filter[idx++], BPF_ALU + BPF_K + BPF_AND, 0, 0, op_val); 390 391 set_filter(&filter[idx], BPF_JMP + op_ins + BPF_K, 392 op_inv?(next_rule_idx - (idx + 1)):0, 393 op_inv?0:(next_rule_idx - (idx + 1)), 394 op_masked?op_val2:op_val); 395 ++idx; 396 } 397 398 /* if we have reached until here, all conditions were met and we can return */ 399 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, action); 400 401 assert(idx == next_rule_idx); 402 } 403 404 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, default_policy); 405 406 assert(idx == sz); 407 408 prog->len = (unsigned short) idx; 409 prog->filter = filter; 410 411 DEBUG("generated seccomp-bpf program:\n"); 412 if (debug) { 413 fprintf(stderr, " [idx]\tcode\t jt\t jf\tk\n"); 414 for (idx=0; idx<sz; idx++) 415 fprintf(stderr, " [%03d]\t%04hx\t%3hhu\t%3hhu\t%08x\n", idx, 416 filter[idx].code, 417 filter[idx].jt, 418 filter[idx].jf, 419 filter[idx].k); 420 } 421 422 return prog; 423 424 errout1: 425 free(filter); 426 errout2: 427 free(prog); 428 return NULL; 429 } 430 431 432 int applyOCIlinuxseccomp(struct sock_fprog *prog) 433 { 434 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 435 ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n"); 436 goto errout; 437 } 438 439 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog)) { 440 ERROR("prctl(PR_SET_SECCOMP) failed: %m\n"); 441 goto errout; 442 } 443 free(prog); 444 445 return 0; 446 447 errout: 448 free(prog->filter); 449 free(prog); 450 return errno; 451 } 452
This page was automatically generated by LXR 0.3.1. • OpenWrt