• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/procd/jail/seccomp-oci.c

  1 /*
  2  * parse and setup OCI seccomp filter
  3  * Copyright (c) 2020 Daniel Golle <daniel@makrotopia.org>
  4  * seccomp example with syscall reporting
  5  * Copyright (c) 2012 The Chromium OS Authors <chromium-os-dev@chromium.org>
  6  * Authors:
  7  *  Kees Cook <keescook@chromium.org>
  8  *  Will Drewry <wad@chromium.org>
  9  *
 10  * Use of this source code is governed by a BSD-style license that can be
 11  * found in the LICENSE file.
 12  *
 13  * BPF control flow
 14  *
 15  * (check_arch)<t>---(check_syscall)<f>---+----[...]<f>---(return default_action)
 16  *       |<f>                |<t>         |
 17  *      KILL         (check_argument)<f>--+
 18  *                           |<t>
 19  *                         [...]
 20  *                           |<t>
 21  *                    (return action)
 22  */
 23 #define _GNU_SOURCE 1
 24 #include <assert.h>
 25 #include <stddef.h>
 26 #include <stdlib.h>
 27 #include <unistd.h>
 28 
 29 #include <libubox/utils.h>
 30 #include <libubox/blobmsg.h>
 31 #include <libubox/blobmsg_json.h>
 32 
 33 #include "log.h"
 34 #include "seccomp-bpf.h"
 35 #include "seccomp-oci.h"
 36 #include "../syscall-names.h"
 37 #include "seccomp-syscalls-helpers.h"
 38 
 39 static uint32_t resolve_action(char *actname)
 40 {
 41         if (!strcmp(actname, "SCMP_ACT_KILL"))
 42                 return SECCOMP_RET_KILL;
 43         else if (!strcmp(actname, "SCMP_ACT_KILL_PROCESS"))
 44                 return SECCOMP_RET_KILLPROCESS;
 45         else if (!strcmp(actname, "SCMP_ACT_TRAP"))
 46                 return SECCOMP_RET_TRAP;
 47         else if (!strcmp(actname, "SCMP_ACT_ERRNO"))
 48                 return SECCOMP_RET_ERRNO;
 49         else if (!strcmp(actname, "SCMP_ACT_ERROR"))
 50                 return SECCOMP_RET_ERRNO;
 51         else if (!strcmp(actname, "SCMP_ACT_TRACE"))
 52                 return SECCOMP_RET_TRACE;
 53         else if (!strcmp(actname, "SCMP_ACT_ALLOW"))
 54                 return SECCOMP_RET_ALLOW;
 55         else if (!strcmp(actname, "SCMP_ACT_LOG"))
 56                 return SECCOMP_RET_LOGALLOW;
 57         else {
 58                 ERROR("unknown seccomp action %s\n", actname);
 59                 return SECCOMP_RET_KILL;
 60         }
 61 }
 62 
 63 static uint8_t resolve_op_ins(const char *op)
 64 {
 65         if (!strcmp(op, "SCMP_CMP_NE")) /* invert EQ */
 66                 return BPF_JEQ;
 67         else if (!strcmp(op, "SCMP_CMP_LT")) /* invert GE */
 68                 return BPF_JGE;
 69         else if (!strcmp(op, "SCMP_CMP_LE")) /* invert GT */
 70                 return BPF_JGT;
 71         else if (!strcmp(op, "SCMP_CMP_EQ"))
 72                 return BPF_JEQ;
 73         else if (!strcmp(op, "SCMP_CMP_GE"))
 74                 return BPF_JGE;
 75         else if (!strcmp(op, "SCMP_CMP_GT"))
 76                 return BPF_JGT;
 77         else if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
 78                 return BPF_JEQ;
 79         else {
 80                 ERROR("unknown seccomp op %s\n", op);
 81                 return 0;
 82         }
 83 }
 84 
 85 static bool resolve_op_is_masked(const char *op)
 86 {
 87         if (!strcmp(op, "SCMP_CMP_MASKED_EQ"))
 88                 return true;
 89 
 90         return false;
 91 }
 92 
 93 static bool resolve_op_inv(const char *op)
 94 {
 95         if (!strcmp(op, "SCMP_CMP_NE") ||
 96             !strcmp(op, "SCMP_CMP_LT") ||
 97             !strcmp(op, "SCMP_CMP_LE"))
 98                 return true;
 99 
100         return false;
101 }
102 
103 static uint32_t resolve_architecture(char *archname)
104 {
105         if (!archname)
106                 return 0;
107 
108         if (!strcmp(archname, "SCMP_ARCH_X86"))
109                 return AUDIT_ARCH_I386;
110         else if (!strcmp(archname, "SCMP_ARCH_X86_64"))
111                 return AUDIT_ARCH_X86_64;
112         else if (!strcmp(archname, "SCMP_ARCH_X32"))
113                 /*
114                  * return AUDIT_ARCH_X86_64;
115                  * 32-bit userland on 64-bit kernel is not supported yet
116                  */
117                 return 0;
118         else if (!strcmp(archname, "SCMP_ARCH_ARM"))
119                 return AUDIT_ARCH_ARM;
120         else if (!strcmp(archname, "SCMP_ARCH_AARCH64"))
121                 return AUDIT_ARCH_AARCH64;
122         else if (!strcmp(archname, "SCMP_ARCH_MIPS"))
123                 return AUDIT_ARCH_MIPS;
124         else if (!strcmp(archname, "SCMP_ARCH_MIPS64"))
125                 return AUDIT_ARCH_MIPS64;
126         else if (!strcmp(archname, "SCMP_ARCH_MIPS64N32"))
127                 return AUDIT_ARCH_MIPS64N32;
128         else if (!strcmp(archname, "SCMP_ARCH_MIPSEL"))
129                 return AUDIT_ARCH_MIPSEL;
130         else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64"))
131                 return AUDIT_ARCH_MIPSEL64;
132         else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64N32"))
133                 return AUDIT_ARCH_MIPSEL64N32;
134         else if (!strcmp(archname, "SCMP_ARCH_PPC"))
135                 return AUDIT_ARCH_PPC;
136         else if (!strcmp(archname, "SCMP_ARCH_PPC64"))
137                 return AUDIT_ARCH_PPC64;
138         else if (!strcmp(archname, "SCMP_ARCH_PPC64LE"))
139                 return AUDIT_ARCH_PPC64LE;
140         else if (!strcmp(archname, "SCMP_ARCH_S390"))
141                 return AUDIT_ARCH_S390;
142         else if (!strcmp(archname, "SCMP_ARCH_S390X"))
143                 return AUDIT_ARCH_S390X;
144         else if (!strcmp(archname, "SCMP_ARCH_PARISC"))
145                 return AUDIT_ARCH_PARISC;
146         else if (!strcmp(archname, "SCMP_ARCH_PARISC64"))
147                 return AUDIT_ARCH_PARISC64;
148         else {
149                 ERROR("unknown seccomp architecture %s\n", archname);
150                 return 0;
151         }
152 }
153 
154 enum {
155         OCI_LINUX_SECCOMP_DEFAULTACTION,
156         OCI_LINUX_SECCOMP_ARCHITECTURES,
157         OCI_LINUX_SECCOMP_FLAGS,
158         OCI_LINUX_SECCOMP_SYSCALLS,
159         __OCI_LINUX_SECCOMP_MAX,
160 };
161 
162 static const struct blobmsg_policy oci_linux_seccomp_policy[] = {
163         [OCI_LINUX_SECCOMP_DEFAULTACTION] = { "defaultAction", BLOBMSG_TYPE_STRING },
164         [OCI_LINUX_SECCOMP_ARCHITECTURES] = { "architectures", BLOBMSG_TYPE_ARRAY },
165         [OCI_LINUX_SECCOMP_FLAGS] = { "flags", BLOBMSG_TYPE_ARRAY },
166         [OCI_LINUX_SECCOMP_SYSCALLS] = { "syscalls", BLOBMSG_TYPE_ARRAY },
167 };
168 
169 enum {
170         OCI_LINUX_SECCOMP_SYSCALLS_NAMES,
171         OCI_LINUX_SECCOMP_SYSCALLS_ACTION,
172         OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET,
173         OCI_LINUX_SECCOMP_SYSCALLS_ARGS,
174         __OCI_LINUX_SECCOMP_SYSCALLS_MAX
175 };
176 
177 static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy[] = {
178         [OCI_LINUX_SECCOMP_SYSCALLS_NAMES] = { "names", BLOBMSG_TYPE_ARRAY },
179         [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET] = { "errnoRet", BLOBMSG_TYPE_INT32 },
180         [OCI_LINUX_SECCOMP_SYSCALLS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
181         [OCI_LINUX_SECCOMP_SYSCALLS_ACTION] = { "action", BLOBMSG_TYPE_STRING },
182 };
183 
184 enum {
185         OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX,
186         OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE,
187         OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO,
188         OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP,
189         __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
190 };
191 
192 static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy[] = {
193         [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] = { "index", BLOBMSG_TYPE_INT32 },
194         [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] = { "value", BLOBMSG_CAST_INT64 },
195         [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO] = { "valueTwo", BLOBMSG_CAST_INT64 },
196         [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP] = { "op", BLOBMSG_TYPE_STRING },
197 };
198 
199 struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg)
200 {
201         struct blob_attr *tb[__OCI_LINUX_SECCOMP_MAX];
202         struct blob_attr *tbn[__OCI_LINUX_SECCOMP_SYSCALLS_MAX];
203         struct blob_attr *tba[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX];
204         struct blob_attr *cur, *curn, *curarg;
205         int rem, remn, remargs, sc;
206         struct sock_filter *filter;
207         struct sock_fprog *prog;
208         int sz = 4, idx = 0;
209         uint32_t default_policy = 0;
210         uint32_t seccomp_arch;
211         bool arch_matched;
212         char *op_str;
213 
214         blobmsg_parse(oci_linux_seccomp_policy, __OCI_LINUX_SECCOMP_MAX,
215                       tb, blobmsg_data(msg), blobmsg_len(msg));
216 
217         if (!tb[OCI_LINUX_SECCOMP_DEFAULTACTION]) {
218                 ERROR("seccomp: no default action set\n");
219                 return NULL;
220         }
221 
222         default_policy = resolve_action(blobmsg_get_string(tb[OCI_LINUX_SECCOMP_DEFAULTACTION]));
223 
224         /* verify architecture while ignoring the x86_64 anomaly for now */
225         if (tb[OCI_LINUX_SECCOMP_ARCHITECTURES]) {
226                 arch_matched = false;
227                 blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_ARCHITECTURES], rem) {
228                         seccomp_arch = resolve_architecture(blobmsg_get_string(cur));
229                         if (ARCH_NR == seccomp_arch) {
230                                 arch_matched = true;
231                                 break;
232                         }
233                 }
234                 if (!arch_matched) {
235                         ERROR("seccomp architecture doesn't match system\n");
236                         return NULL;
237                 }
238         }
239 
240         blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
241                 sz += 2; /* load and return */
242 
243                 blobmsg_parse(oci_linux_seccomp_syscalls_policy,
244                               __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
245                               tbn, blobmsg_data(cur), blobmsg_len(cur));
246                 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
247                         sc = find_syscall(blobmsg_get_string(curn));
248                         if (sc == -1) {
249                                 DEBUG("unknown syscall '%s'\n", blobmsg_get_string(curn));
250                                 /* TODO: support run.oci.seccomp_fail_unknown_syscall=1 annotation */
251                                 continue;
252                         }
253                         ++sz;
254                 }
255 
256                 if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS]) {
257                         blobmsg_for_each_attr(curarg, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remargs) {
258                                 sz += 2; /* load and compare */
259 
260                                 blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
261                                               __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
262                                               tba, blobmsg_data(curarg), blobmsg_len(curarg));
263                                 if (!tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] ||
264                                     !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] ||
265                                     !tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP])
266                                         return NULL;
267 
268                                 if (blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]) > 5)
269                                         return NULL;
270 
271                                 op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
272                                 if (!resolve_op_ins(op_str))
273                                         return NULL;
274 
275                                 if (resolve_op_is_masked(op_str))
276                                         ++sz; /* SCMP_CMP_MASKED_EQ needs an extra BPF_AND op */
277                         }
278                 }
279         }
280 
281         if (sz < 6)
282                 return NULL;
283 
284         prog = malloc(sizeof(struct sock_fprog));
285         if (!prog)
286                 return NULL;
287 
288         filter = calloc(sz, sizeof(struct sock_filter));
289         if (!filter) {
290                 ERROR("failed to allocate memory for seccomp filter\n");
291                 goto errout2;
292         }
293 
294         /* validate arch */
295         set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr);
296         set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR);
297         set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
298 
299         blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
300                 uint32_t action;
301                 uint32_t op_idx;
302                 uint8_t op_ins;
303                 bool op_inv, op_masked;
304                 uint64_t op_val, op_val2;
305                 int start_rule_idx;
306                 int next_rule_idx;
307 
308                 blobmsg_parse(oci_linux_seccomp_syscalls_policy,
309                               __OCI_LINUX_SECCOMP_SYSCALLS_MAX,
310                               tbn, blobmsg_data(cur), blobmsg_len(cur));
311                 action = resolve_action(blobmsg_get_string(
312                                 tbn[OCI_LINUX_SECCOMP_SYSCALLS_ACTION]));
313                 if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]) {
314                         if (action != SECCOMP_RET_ERRNO)
315                                 goto errout1;
316 
317                         action = SECCOMP_RET_ERROR(blobmsg_get_u32(
318                                         tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]));
319                 } else if (action == SECCOMP_RET_ERRNO)
320                         action = SECCOMP_RET_ERROR(EPERM);
321 
322                 /* load syscall */
323                 set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr);
324 
325                 /* get number of syscall names */
326                 next_rule_idx = idx;
327                 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
328                         if (find_syscall(blobmsg_get_string(curn)) == -1)
329                                 continue;
330 
331                         ++next_rule_idx;
332                 }
333                 start_rule_idx = next_rule_idx;
334 
335                 /* calculate length of argument filter rules */
336                 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
337                         blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
338                                       __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
339                                       tba, blobmsg_data(curn), blobmsg_len(curn));
340                         next_rule_idx += 2;
341                         op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
342                         if (resolve_op_is_masked(op_str))
343                                 ++next_rule_idx;
344                 }
345 
346                 ++next_rule_idx; /* account for return action */
347 
348                 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
349                         sc = find_syscall(blobmsg_get_string(curn));
350                         if (sc == -1)
351                                 continue;
352                         /*
353                          * check syscall, skip other syscall checks if match is found.
354                          * if no match is found, jump to next section
355                          */
356                         set_filter(&filter[idx], BPF_JMP + BPF_JEQ + BPF_K,
357                                    start_rule_idx - (idx + 1),
358                                    ((idx + 1) == start_rule_idx)?(next_rule_idx - (idx + 1)):0,
359                                    sc);
360                         ++idx;
361                 }
362 
363                 assert(idx = start_rule_idx);
364 
365                 /* generate argument filter rules */
366                 blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
367                         blobmsg_parse(oci_linux_seccomp_syscalls_args_policy,
368                                       __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX,
369                                       tba, blobmsg_data(curn), blobmsg_len(curn));
370 
371                         op_str = blobmsg_get_string(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP]);
372                         op_ins = resolve_op_ins(op_str);
373                         op_inv = resolve_op_inv(op_str);
374                         op_masked = resolve_op_is_masked(op_str);
375                         op_idx = blobmsg_get_u32(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX]);
376                         op_val = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE]);
377                         if (tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO])
378                                 op_val2 = blobmsg_cast_u64(tba[OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO]);
379                         else
380                                 op_val2 = 0;
381 
382                         /* load argument */
383                         set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_arg(op_idx));
384 
385                         /* apply mask */
386                         if (op_masked)
387                                 set_filter(&filter[idx++], BPF_ALU + BPF_K + BPF_AND, 0, 0, op_val);
388 
389                         set_filter(&filter[idx], BPF_JMP + op_ins + BPF_K,
390                                    op_inv?(next_rule_idx - (idx + 1)):0,
391                                    op_inv?0:(next_rule_idx - (idx + 1)),
392                                    op_masked?op_val2:op_val);
393                         ++idx;
394                 }
395 
396                 /* if we have reached until here, all conditions were met and we can return */
397                 set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, action);
398 
399                 assert(idx == next_rule_idx);
400         }
401 
402         set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, default_policy);
403 
404         assert(idx == sz);
405 
406         prog->len = (unsigned short) idx;
407         prog->filter = filter;
408 
409         DEBUG("generated seccomp-bpf program:\n");
410         if (debug) {
411                 fprintf(stderr, " [idx]\tcode\t jt\t jf\tk\n");
412                 for (idx=0; idx<sz; idx++)
413                         fprintf(stderr, " [%03d]\t%04hx\t%3hhu\t%3hhu\t%08x\n", idx,
414                                 filter[idx].code,
415                                 filter[idx].jt,
416                                 filter[idx].jf,
417                                 filter[idx].k);
418         }
419 
420         return prog;
421 
422 errout1:
423         free(prog->filter);
424 errout2:
425         free(prog);
426         return NULL;
427 }
428 
429 
430 int applyOCIlinuxseccomp(struct sock_fprog *prog)
431 {
432         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
433                 ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
434                 goto errout;
435         }
436 
437         if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog)) {
438                 ERROR("prctl(PR_SET_SECCOMP) failed: %m\n");
439                 goto errout;
440         }
441         free(prog);
442 
443         return 0;
444 
445 errout:
446         free(prog->filter);
447         free(prog);
448         return errno;
449 }
450 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt