• source navigation  • diff markup  • identifier search  • freetext search  • 

Sources/procd/jail/jail.c

  1 /*
  2  * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
  3  *
  4  * This program is free software; you can redistribute it and/or modify
  5  * it under the terms of the GNU Lesser General Public License version 2.1
  6  * as published by the Free Software Foundation
  7  *
  8  * This program is distributed in the hope that it will be useful,
  9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11  * GNU General Public License for more details.
 12  */
 13 
 14 #define _GNU_SOURCE
 15 #include <sys/mount.h>
 16 #include <sys/prctl.h>
 17 #include <sys/wait.h>
 18 #include <sys/types.h>
 19 
 20 #include <stdlib.h>
 21 #include <unistd.h>
 22 #include <errno.h>
 23 #include <pwd.h>
 24 #include <grp.h>
 25 #include <string.h>
 26 #include <sys/stat.h>
 27 #include <fcntl.h>
 28 #include <libgen.h>
 29 #include <sched.h>
 30 #include <linux/limits.h>
 31 #include <signal.h>
 32 
 33 #include "capabilities.h"
 34 #include "elf.h"
 35 #include "fs.h"
 36 #include "jail.h"
 37 #include "log.h"
 38 
 39 #include <libubox/uloop.h>
 40 #include <libubus.h>
 41 
 42 #define STACK_SIZE      (1024 * 1024)
 43 #define OPT_ARGS        "S:C:n:h:r:w:d:psulocU:G:NR:fFO:T:Ey"
 44 
 45 static struct {
 46         char *name;
 47         char *hostname;
 48         char **jail_argv;
 49         char *seccomp;
 50         char *capabilities;
 51         char *user;
 52         char *group;
 53         char *extroot;
 54         char *overlaydir;
 55         char *tmpoverlaysize;
 56         int no_new_privs;
 57         int namespace;
 58         int procfs;
 59         int ronly;
 60         int sysfs;
 61         int console;
 62         int pw_uid;
 63         int pw_gid;
 64         int gr_gid;
 65         int require_jail;
 66 } opts;
 67 
 68 
 69 extern int pivot_root(const char *new_root, const char *put_old);
 70 
 71 int debug = 0;
 72 
 73 static char child_stack[STACK_SIZE];
 74 
 75 int console_fd;
 76 
 77 static int mkdir_p(char *dir, mode_t mask)
 78 {
 79         char *l = strrchr(dir, '/');
 80         int ret;
 81 
 82         if (!l)
 83                 return 0;
 84 
 85         *l = '\0';
 86 
 87         if (mkdir_p(dir, mask))
 88                 return -1;
 89 
 90         *l = '/';
 91 
 92         ret = mkdir(dir, mask);
 93         if (ret && errno == EEXIST)
 94                 return 0;
 95 
 96         if (ret)
 97                 ERROR("mkdir(%s, %d) failed: %m\n", dir, mask);
 98 
 99         return ret;
100 }
101 
102 static int _mount_bind(const char *root, const char *path, const char *target, int readonly, int strict, int error)
103 {
104         struct stat s;
105         char new[PATH_MAX];
106         int fd;
107         int remount_flags = MS_BIND | MS_REMOUNT;
108 
109         if (stat(path, &s)) {
110                 ERROR("stat(%s) failed: %m\n", path);
111                 return error;
112         }
113 
114         snprintf(new, sizeof(new), "%s%s", root, target?target:path);
115 
116         if (S_ISDIR(s.st_mode)) {
117                 mkdir_p(new, 0755);
118         } else {
119                 mkdir_p(dirname(new), 0755);
120                 snprintf(new, sizeof(new), "%s%s", root, target?target:path);
121                 fd = creat(new, 0644);
122                 if (fd == -1) {
123                         ERROR("creat(%s) failed: %m\n", new);
124                         return -1;
125                 }
126                 close(fd);
127         }
128 
129         if (mount(path, new, NULL, MS_BIND, NULL)) {
130                 ERROR("failed to mount -B %s %s: %m\n", path, new);
131                 return -1;
132         }
133 
134         if (readonly)
135                 remount_flags |= MS_RDONLY;
136 
137         if (strict)
138                 remount_flags |= MS_NOEXEC | MS_NOSUID | MS_NODEV;
139 
140         if ((strict || readonly) && mount(NULL, new, NULL, remount_flags, NULL)) {
141                 ERROR("failed to remount (%s%s%s) %s: %m\n", readonly?"ro":"rw",
142                       (readonly && strict)?", ":"", strict?"strict":"", new);
143                 return -1;
144         }
145 
146         DEBUG("mount -B %s %s (%s%s%s)\n", path, new,
147               readonly?"ro":"rw", (readonly && strict)?", ":"", strict?"strict":"");
148 
149         return 0;
150 }
151 
152 int mount_bind(const char *root, const char *path, int readonly, int error) {
153         return _mount_bind(root, path, NULL, readonly, 0, error);
154 }
155 
156 static int mount_overlay(char *jail_root, char *overlaydir) {
157         char *upperdir, *workdir, *optsstr;
158         const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s";
159         int ret = -1;
160 
161         if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0)
162                 goto out;
163 
164         if (asprintf(&workdir, "%s%s", overlaydir, "/work") < 0)
165                 goto upper_printf;
166 
167         if (asprintf(&optsstr, mountoptsformat, jail_root, upperdir, workdir) < 0)
168                 goto work_printf;
169 
170         if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755))
171                 goto opts_printf;
172 
173         DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr);
174 
175         if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr))
176                 goto opts_printf;
177 
178         ret = 0;
179 
180 opts_printf:
181         free(optsstr);
182 work_printf:
183         free(workdir);
184 upper_printf:
185         free(upperdir);
186 out:
187         return ret;
188 }
189 
190 static void pass_console(int console_fd)
191 {
192         struct ubus_context *ctx = ubus_connect(NULL);
193         static struct blob_buf req;
194         uint32_t id;
195 
196         if (!ctx)
197                 return;
198 
199         blob_buf_init(&req, 0);
200         blobmsg_add_string(&req, "name", opts.name);
201 
202         if (ubus_lookup_id(ctx, "container", &id) ||
203             ubus_invoke_fd(ctx, id, "console_set", req.head, NULL, NULL, 3000, console_fd))
204                 INFO("ubus request failed\n");
205         else
206                 close(console_fd);
207 
208         blob_buf_free(&req);
209         ubus_free(ctx);
210 }
211 
212 static int create_dev_console(const char *jail_root)
213 {
214         char *console_fname;
215         char dev_console_path[PATH_MAX];
216         int slave_console_fd;
217 
218         /* Open UNIX/98 virtual console */
219         console_fd = posix_openpt(O_RDWR | O_NOCTTY);
220         if (console_fd == -1)
221                 return -1;
222 
223         console_fname = ptsname(console_fd);
224         DEBUG("got console fd %d and PTS client name %s\n", console_fd, console_fname);
225         if (!console_fname)
226                 goto no_console;
227 
228         grantpt(console_fd);
229         unlockpt(console_fd);
230 
231         /* pass PTY master to procd */
232         pass_console(console_fd);
233 
234         /* mount-bind PTY slave to /dev/console in jail */
235         snprintf(dev_console_path, sizeof(dev_console_path), "%s/dev/console", jail_root);
236         close(creat(dev_console_path, 0620));
237 
238         if (mount(console_fname, dev_console_path, NULL, MS_BIND, NULL))
239                 goto no_console;
240 
241         /* use PTY slave for stdio */
242         slave_console_fd = open(console_fname, O_RDWR); /* | O_NOCTTY */
243         dup2(slave_console_fd, 0);
244         dup2(slave_console_fd, 1);
245         dup2(slave_console_fd, 2);
246         close(slave_console_fd);
247 
248         INFO("using guest console %s\n", console_fname);
249 
250         return 0;
251 
252 no_console:
253         close(console_fd);
254         return 1;
255 }
256 
257 static int build_jail_fs(void)
258 {
259         char jail_root[] = "/tmp/ujail-XXXXXX";
260         char tmpovdir[] = "/tmp/ujail-overlay-XXXXXX";
261         char tmpdevdir[] = "/tmp/ujail-XXXXXX/dev";
262         char tmpdevptsdir[] = "/tmp/ujail-XXXXXX/dev/pts";
263         char *overlaydir = NULL;
264 
265         if (mkdtemp(jail_root) == NULL) {
266                 ERROR("mkdtemp(%s) failed: %m\n", jail_root);
267                 return -1;
268         }
269 
270         /* oldroot can't be MS_SHARED else pivot_root() fails */
271         if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) {
272                 ERROR("private mount failed %m\n");
273                 return -1;
274         }
275 
276         if (opts.extroot) {
277                 if (mount(opts.extroot, jail_root, NULL, MS_BIND, NULL)) {
278                         ERROR("extroot mount failed %m\n");
279                         return -1;
280                 }
281         } else {
282                 if (mount("tmpfs", jail_root, "tmpfs", MS_NOATIME, "mode=0755")) {
283                         ERROR("tmpfs mount failed %m\n");
284                         return -1;
285                 }
286         }
287 
288         if (opts.tmpoverlaysize) {
289                 char mountoptsstr[] = "mode=0755,size=XXXXXXXX";
290 
291                 snprintf(mountoptsstr, sizeof(mountoptsstr),
292                          "mode=0755,size=%s", opts.tmpoverlaysize);
293                 if (mkdtemp(tmpovdir) == NULL) {
294                         ERROR("mkdtemp(%s) failed: %m\n", jail_root);
295                         return -1;
296                 }
297                 if (mount("tmpfs", tmpovdir, "tmpfs", MS_NOATIME,
298                           mountoptsstr)) {
299                         ERROR("failed to mount tmpfs for overlay (size=%s)\n", opts.tmpoverlaysize);
300                         return -1;
301                 }
302                 overlaydir = tmpovdir;
303         }
304 
305         if (opts.overlaydir)
306                 overlaydir = opts.overlaydir;
307 
308         if (overlaydir)
309                 mount_overlay(jail_root, overlaydir);
310 
311         if (chdir(jail_root)) {
312                 ERROR("chdir(%s) (jail_root) failed: %m\n", jail_root);
313                 return -1;
314         }
315 
316         snprintf(tmpdevdir, sizeof(tmpdevdir), "%s/dev", jail_root);
317         mkdir_p(tmpdevdir, 0755);
318         if (mount(NULL, tmpdevdir, "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "size=1M"))
319                 return -1;
320 
321         snprintf(tmpdevptsdir, sizeof(tmpdevptsdir), "%s/dev/pts", jail_root);
322         mkdir_p(tmpdevptsdir, 0755);
323         if (mount(NULL, tmpdevptsdir, "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, NULL))
324                 return -1;
325 
326         if (opts.console)
327                 create_dev_console(jail_root);
328 
329         if (mount_all(jail_root)) {
330                 ERROR("mount_all() failed\n");
331                 return -1;
332         }
333 
334         if (opts.namespace & CLONE_NEWNET) {
335                 char hostdir[PATH_MAX], jailetc[PATH_MAX], jaillink[PATH_MAX];
336 
337                 snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
338                 mkdir_p(hostdir, 0755);
339                 _mount_bind(jail_root, hostdir, "/tmp/resolv.conf.d", 1, 1, -1);
340                 snprintf(jailetc, PATH_MAX, "%s/etc", jail_root);
341                 mkdir_p(jailetc, 0755);
342                 snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root);
343                 if (overlaydir)
344                         unlink(jaillink);
345                 symlink("../tmp/resolv.conf.d/resolv.conf.auto", jaillink);
346         }
347 
348         char dirbuf[sizeof(jail_root) + 4];
349         snprintf(dirbuf, sizeof(dirbuf), "%s/old", jail_root);
350         mkdir(dirbuf, 0755);
351 
352         if (pivot_root(jail_root, dirbuf) == -1) {
353                 ERROR("pivot_root(%s, %s) failed: %m\n", jail_root, dirbuf);
354                 return -1;
355         }
356         if (chdir("/")) {
357                 ERROR("chdir(/) (after pivot_root) failed: %m\n");
358                 return -1;
359         }
360 
361         snprintf(dirbuf, sizeof(dirbuf), "/old%s", jail_root);
362         umount2(dirbuf, MNT_DETACH);
363         rmdir(dirbuf);
364         if (opts.tmpoverlaysize) {
365                 char tmpdirbuf[sizeof(tmpovdir) + 4];
366                 snprintf(tmpdirbuf, sizeof(tmpdirbuf), "/old%s", tmpovdir);
367                 umount2(tmpdirbuf, MNT_DETACH);
368                 rmdir(tmpdirbuf);
369         }
370 
371         umount2("/old", MNT_DETACH);
372         rmdir("/old");
373 
374         if (opts.procfs) {
375                 mkdir("/proc", 0755);
376                 mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, 0);
377                 /*
378                  * make /proc/sys read-only while keeping read-write to
379                  * /proc/sys/net if CLONE_NEWNET is set.
380                  */
381                 if (opts.namespace & CLONE_NEWNET)
382                         mount("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, 0);
383 
384                 mount("/proc/sys", "/proc/sys", NULL, MS_BIND, 0);
385                 mount(NULL, "/proc/sys", NULL, MS_REMOUNT | MS_RDONLY, 0);
386                 mount(NULL, "/proc", NULL, MS_REMOUNT, 0);
387 
388                 if (opts.namespace & CLONE_NEWNET)
389                         mount("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, 0);
390         }
391         if (opts.sysfs) {
392                 mkdir("/sys", 0755);
393                 mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, 0);
394         }
395         if (opts.ronly)
396                 mount(NULL, "/", NULL, MS_RDONLY | MS_REMOUNT, 0);
397 
398         return 0;
399 }
400 
401 static int write_uid_gid_map(pid_t child_pid, bool gidmap, int id)
402 {
403         int map_file;
404         char map_path[64];
405         const char *map_format = "%d %d %d\n";
406         if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
407                 child_pid, gidmap?"gid_map":"uid_map") < 0)
408                 return -1;
409 
410         if ((map_file = open(map_path, O_WRONLY)) == -1)
411                 return -1;
412 
413         if (dprintf(map_file, map_format, 0, id, 1) == -1) {
414                 close(map_file);
415                 return -1;
416         }
417 
418         close(map_file);
419         return 0;
420 }
421 
422 static int write_setgroups(pid_t child_pid, bool allow)
423 {
424         int setgroups_file;
425         char setgroups_path[64];
426 
427         if (snprintf(setgroups_path, sizeof(setgroups_path), "/proc/%d/setgroups",
428                 child_pid) < 0) {
429                 return -1;
430         }
431 
432         if ((setgroups_file = open(setgroups_path, O_WRONLY)) == -1) {
433                 return -1;
434         }
435 
436         if (dprintf(setgroups_file, allow?"allow":"deny") == -1) {
437                 close(setgroups_file);
438                 return -1;
439         }
440 
441         close(setgroups_file);
442         return 0;
443 }
444 
445 static void get_jail_user(int *user, int *user_gid, int *gr_gid)
446 {
447         struct passwd *p = NULL;
448         struct group *g = NULL;
449 
450         if (opts.user) {
451                 p = getpwnam(opts.user);
452                 if (!p) {
453                         ERROR("failed to get uid/gid for user %s: %d (%s)\n",
454                               opts.user, errno, strerror(errno));
455                         exit(EXIT_FAILURE);
456                 }
457                 *user = p->pw_uid;
458                 *user_gid = p->pw_gid;
459         } else {
460                 *user = -1;
461                 *user_gid = -1;
462         }
463 
464         if (opts.group) {
465                 g = getgrnam(opts.group);
466                 if (!g) {
467                         ERROR("failed to get gid for group %s: %m\n", opts.group);
468                         exit(EXIT_FAILURE);
469                 }
470                 *gr_gid = g->gr_gid;
471         } else {
472                 *gr_gid = -1;
473         }
474 };
475 
476 static void set_jail_user(int pw_uid, int user_gid, int gr_gid)
477 {
478         if ((user_gid != -1) && initgroups(opts.user, user_gid)) {
479                 ERROR("failed to initgroups() for user %s: %m\n", opts.user);
480                 exit(EXIT_FAILURE);
481         }
482 
483         if ((gr_gid != -1) && setregid(gr_gid, gr_gid)) {
484                 ERROR("failed to set group id %d: %m\n", gr_gid);
485                 exit(EXIT_FAILURE);
486         }
487 
488         if ((pw_uid != -1) && setreuid(pw_uid, pw_uid)) {
489                 ERROR("failed to set user id %d: %m\n", pw_uid);
490                 exit(EXIT_FAILURE);
491         }
492 }
493 
494 #define MAX_ENVP        8
495 static char** build_envp(const char *seccomp)
496 {
497         static char *envp[MAX_ENVP];
498         static char preload_var[PATH_MAX];
499         static char seccomp_var[PATH_MAX];
500         static char debug_var[] = "LD_DEBUG=all";
501         static char container_var[] = "container=ujail";
502         const char *preload_lib = find_lib("libpreload-seccomp.so");
503         int count = 0;
504 
505         if (seccomp && !preload_lib) {
506                 ERROR("failed to add preload-lib to env\n");
507                 return NULL;
508         }
509         if (seccomp) {
510                 snprintf(seccomp_var, sizeof(seccomp_var), "SECCOMP_FILE=%s", seccomp);
511                 envp[count++] = seccomp_var;
512                 snprintf(preload_var, sizeof(preload_var), "LD_PRELOAD=%s", preload_lib);
513                 envp[count++] = preload_var;
514         }
515 
516         envp[count++] = container_var;
517 
518         if (debug > 1)
519                 envp[count++] = debug_var;
520 
521         return envp;
522 }
523 
524 static void usage(void)
525 {
526         fprintf(stderr, "ujail <options> -- <binary> <params ...>\n");
527         fprintf(stderr, "  -d <num>\tshow debug log (increase num to increase verbosity)\n");
528         fprintf(stderr, "  -S <file>\tseccomp filter config\n");
529         fprintf(stderr, "  -C <file>\tcapabilities drop config\n");
530         fprintf(stderr, "  -c\t\tset PR_SET_NO_NEW_PRIVS\n");
531         fprintf(stderr, "  -n <name>\tthe name of the jail\n");
532         fprintf(stderr, "namespace jail options:\n");
533         fprintf(stderr, "  -h <hostname>\tchange the hostname of the jail\n");
534         fprintf(stderr, "  -N\t\tjail has network namespace\n");
535         fprintf(stderr, "  -f\t\tjail has user namespace\n");
536         fprintf(stderr, "  -F\t\tjail has cgroups namespace\n");
537         fprintf(stderr, "  -r <file>\treadonly files that should be staged\n");
538         fprintf(stderr, "  -w <file>\twriteable files that should be staged\n");
539         fprintf(stderr, "  -p\t\tjail has /proc\n");
540         fprintf(stderr, "  -s\t\tjail has /sys\n");
541         fprintf(stderr, "  -l\t\tjail has /dev/log\n");
542         fprintf(stderr, "  -u\t\tjail has a ubus socket\n");
543         fprintf(stderr, "  -U <name>\tuser to run jailed process\n");
544         fprintf(stderr, "  -G <name>\tgroup to run jailed process\n");
545         fprintf(stderr, "  -o\t\tremont jail root (/) read only\n");
546         fprintf(stderr, "  -R <dir>\texternal jail rootfs (system container)\n");
547         fprintf(stderr, "  -O <dir>\tdirectory for r/w overlayfs\n");
548         fprintf(stderr, "  -T <size>\tuse tmpfs r/w overlayfs with <size>\n");
549         fprintf(stderr, "  -E\t\tfail if jail cannot be setup\n");
550         fprintf(stderr, "  -y\t\tprovide jail console\n");
551         fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\
552 and he has the same powers as root outside the jail,\n\
553 thus he can escape the jail and/or break stuff.\n\
554 Please use seccomp/capabilities (-S/-C) to restrict his powers\n\n\
555 If you use none of the namespace jail options,\n\
556 ujail will not use namespace/build a jail,\n\
557 and will only drop capabilities/apply seccomp filter.\n\n");
558 }
559 
560 static int exec_jail(void *pipes_ptr)
561 {
562         int *pipes = (int*)pipes_ptr;
563         char buf[1];
564         int pw_uid, pw_gid, gr_gid;
565 
566         close(pipes[0]);
567         close(pipes[3]);
568 
569         buf[0] = 'i';
570         if (write(pipes[1], buf, 1) < 1) {
571                 ERROR("can't write to parent\n");
572                 exit(EXIT_FAILURE);
573         }
574         if (read(pipes[2], buf, 1) < 1) {
575                 ERROR("can't read from parent\n");
576                 exit(EXIT_FAILURE);
577         }
578         if (buf[0] != 'O') {
579                 ERROR("parent had an error, child exiting\n");
580                 exit(EXIT_FAILURE);
581         }
582 
583         close(pipes[1]);
584         close(pipes[2]);
585 
586         if (opts.namespace & CLONE_NEWUSER) {
587                 if (setgid(0) < 0) {
588                         ERROR("setgid\n");
589                         exit(EXIT_FAILURE);
590                 }
591                 if (setuid(0) < 0) {
592                         ERROR("setuid\n");
593                         exit(EXIT_FAILURE);
594                 }
595 //              if (setgroups(0, NULL) < 0) {
596 //                      ERROR("setgroups\n");
597 //                      exit(EXIT_FAILURE);
598 //              }
599         }
600 
601         if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
602                         && sethostname(opts.hostname, strlen(opts.hostname))) {
603                 ERROR("sethostname(%s) failed: %m\n", opts.hostname);
604                 exit(EXIT_FAILURE);
605         }
606 
607         if ((opts.namespace & CLONE_NEWNS) && build_jail_fs()) {
608                 ERROR("failed to build jail fs\n");
609                 exit(EXIT_FAILURE);
610         }
611 
612         if (opts.capabilities && drop_capabilities(opts.capabilities))
613                 exit(EXIT_FAILURE);
614 
615         if (opts.no_new_privs && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
616                 ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
617                 exit(EXIT_FAILURE);
618         }
619 
620         if (!(opts.namespace & CLONE_NEWUSER)) {
621                 get_jail_user(&pw_uid, &pw_gid, &gr_gid);
622                 set_jail_user(pw_uid, pw_gid, gr_gid);
623         }
624 
625         char **envp = build_envp(opts.seccomp);
626         if (!envp)
627                 exit(EXIT_FAILURE);
628 
629         INFO("exec-ing %s\n", *opts.jail_argv);
630         execve(*opts.jail_argv, opts.jail_argv, envp);
631         /* we get there only if execve fails */
632         ERROR("failed to execve %s: %m\n", *opts.jail_argv);
633         exit(EXIT_FAILURE);
634 }
635 
636 static int jail_running = 1;
637 static int jail_return_code = 0;
638 
639 static void jail_process_timeout_cb(struct uloop_timeout *t);
640 static struct uloop_timeout jail_process_timeout = {
641         .cb = jail_process_timeout_cb,
642 };
643 
644 static void jail_process_handler(struct uloop_process *c, int ret)
645 {
646         uloop_timeout_cancel(&jail_process_timeout);
647         if (WIFEXITED(ret)) {
648                 jail_return_code = WEXITSTATUS(ret);
649                 INFO("jail (%d) exited with exit: %d\n", c->pid, jail_return_code);
650         } else {
651                 jail_return_code = WTERMSIG(ret);
652                 INFO("jail (%d) exited with signal: %d\n", c->pid, jail_return_code);
653         }
654         jail_running = 0;
655         uloop_end();
656 }
657 
658 static struct uloop_process jail_process = {
659         .cb = jail_process_handler,
660 };
661 
662 static void jail_process_timeout_cb(struct uloop_timeout *t)
663 {
664         DEBUG("jail process failed to stop, sending SIGKILL\n");
665         kill(jail_process.pid, SIGKILL);
666 }
667 
668 static void jail_handle_signal(int signo)
669 {
670         DEBUG("forwarding signal %d to the jailed process\n", signo);
671         kill(jail_process.pid, signo);
672 }
673 
674 static int netns_open_pid(const pid_t target_ns)
675 {
676         char pid_net_path[PATH_MAX];
677 
678         snprintf(pid_net_path, sizeof(pid_net_path), "/proc/%u/ns/net", target_ns);
679 
680         return open(pid_net_path, O_RDONLY);
681 }
682 
683 static void netns_updown(pid_t pid, bool start)
684 {
685         struct ubus_context *ctx = ubus_connect(NULL);
686         static struct blob_buf req;
687         uint32_t id;
688 
689         if (!ctx)
690                 return;
691 
692         blob_buf_init(&req, 0);
693         blobmsg_add_string(&req, "jail", opts.name);
694         blobmsg_add_u32(&req, "pid", pid);
695         blobmsg_add_u8(&req, "start", start);
696 
697         if (ubus_lookup_id(ctx, "network", &id) ||
698             ubus_invoke(ctx, id, "netns_updown", req.head, NULL, NULL, 3000))
699                 INFO("ubus request failed\n");
700 
701         blob_buf_free(&req);
702         ubus_free(ctx);
703 }
704 
705 int main(int argc, char **argv)
706 {
707         sigset_t sigmask;
708         uid_t uid = getuid();
709         char log[] = "/dev/log";
710         char ubus[] = "/var/run/ubus.sock";
711         int ch, i;
712         int pipes[4];
713         char sig_buf[1];
714         int netns_fd;
715 
716         if (uid) {
717                 ERROR("not root, aborting: %m\n");
718                 return EXIT_FAILURE;
719         }
720 
721         umask(022);
722         mount_list_init();
723         init_library_search();
724 
725         while ((ch = getopt(argc, argv, OPT_ARGS)) != -1) {
726                 switch (ch) {
727                 case 'd':
728                         debug = atoi(optarg);
729                         break;
730                 case 'p':
731                         opts.namespace |= CLONE_NEWNS;
732                         opts.procfs = 1;
733                         break;
734                 case 'o':
735                         opts.namespace |= CLONE_NEWNS;
736                         opts.ronly = 1;
737                         break;
738                 case 'f':
739                         opts.namespace |= CLONE_NEWUSER;
740                         break;
741                 case 'F':
742                         opts.namespace |= CLONE_NEWCGROUP;
743                         break;
744                 case 'R':
745                         opts.extroot = optarg;
746                         break;
747                 case 's':
748                         opts.namespace |= CLONE_NEWNS;
749                         opts.sysfs = 1;
750                         break;
751                 case 'S':
752                         opts.seccomp = optarg;
753                         add_mount(optarg, 1, -1);
754                         break;
755                 case 'C':
756                         opts.capabilities = optarg;
757                         break;
758                 case 'c':
759                         opts.no_new_privs = 1;
760                         break;
761                 case 'n':
762                         opts.name = optarg;
763                         break;
764                 case 'N':
765                         opts.namespace |= CLONE_NEWNET;
766                         break;
767                 case 'h':
768                         opts.namespace |= CLONE_NEWUTS;
769                         opts.hostname = optarg;
770                         break;
771                 case 'r':
772                         opts.namespace |= CLONE_NEWNS;
773                         add_path_and_deps(optarg, 1, 0, 0);
774                         break;
775                 case 'w':
776                         opts.namespace |= CLONE_NEWNS;
777                         add_path_and_deps(optarg, 0, 0, 0);
778                         break;
779                 case 'u':
780                         opts.namespace |= CLONE_NEWNS;
781                         add_mount(ubus, 0, -1);
782                         break;
783                 case 'l':
784                         opts.namespace |= CLONE_NEWNS;
785                         add_mount(log, 0, -1);
786                         break;
787                 case 'U':
788                         opts.user = optarg;
789                         break;
790                 case 'G':
791                         opts.group = optarg;
792                         break;
793                 case 'O':
794                         opts.overlaydir = optarg;
795                         break;
796                 case 'T':
797                         opts.tmpoverlaysize = optarg;
798                         break;
799                 case 'E':
800                         opts.require_jail = 1;
801                         break;
802                 case 'y':
803                         opts.console = 1;
804                         break;
805                 }
806         }
807 
808         if (opts.namespace)
809                 opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID;
810 
811         if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) {
812                 ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize);
813                 return -1;
814         }
815 
816         /* no <binary> param found */
817         if (argc - optind < 1) {
818                 usage();
819                 return EXIT_FAILURE;
820         }
821         if (!(opts.namespace||opts.capabilities||opts.seccomp)) {
822                 ERROR("Not using namespaces, capabilities or seccomp !!!\n\n");
823                 usage();
824                 return EXIT_FAILURE;
825         }
826         DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n",
827                 opts.namespace,
828                 opts.capabilities != 0,
829                 opts.seccomp != 0);
830 
831         opts.jail_argv = &argv[optind];
832 
833         get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
834 
835         if (!opts.extroot) {
836                 if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
837                         ERROR("failed to load dependencies\n");
838                         return -1;
839                 }
840         }
841 
842         if (opts.namespace && opts.seccomp && add_path_and_deps("libpreload-seccomp.so", 1, -1, 1)) {
843                 ERROR("failed to load libpreload-seccomp.so\n");
844                 opts.seccomp = 0;
845                 if (opts.require_jail)
846                         return -1;
847         }
848 
849         if (opts.name)
850                 prctl(PR_SET_NAME, opts.name, NULL, NULL, NULL);
851 
852         uloop_init();
853 
854         sigfillset(&sigmask);
855         for (i = 0; i < _NSIG; i++) {
856                 struct sigaction s = { 0 };
857 
858                 if (!sigismember(&sigmask, i))
859                         continue;
860                 if ((i == SIGCHLD) || (i == SIGPIPE) || (i == SIGSEGV))
861                         continue;
862 
863                 s.sa_handler = jail_handle_signal;
864                 sigaction(i, &s, NULL);
865         }
866 
867         if (opts.namespace) {
868                 if (opts.namespace & CLONE_NEWNS) {
869                         add_mount("/dev/full", 0, -1);
870                         add_mount("/dev/null", 0, -1);
871                         add_mount("/dev/random", 0, -1);
872                         add_mount("/dev/urandom", 0, -1);
873                         add_mount("/dev/zero", 0, -1);
874                         add_mount("/dev/ptmx", 0, -1);
875                         add_mount("/dev/tty", 0, -1);
876 
877                         if (!opts.extroot && (opts.user || opts.group)) {
878                                 add_mount("/etc/passwd", 0, -1);
879                                 add_mount("/etc/group", 0, -1);
880                         }
881 
882 #if defined(__GLIBC__)
883                         if (!opts.extroot)
884                                 add_mount("/etc/nsswitch.conf", 0, -1);
885 #endif
886 
887                         if (!(opts.namespace & CLONE_NEWNET)) {
888                                 add_mount("/etc/resolv.conf", 0, -1);
889                         }
890                 }
891 
892                 if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0)
893                         return -1;
894 
895                 jail_process.pid = clone(exec_jail, child_stack + STACK_SIZE, SIGCHLD | opts.namespace, &pipes);
896         } else {
897                 jail_process.pid = fork();
898         }
899 
900         if (jail_process.pid > 0) {
901                 seteuid(0);
902                 /* parent process */
903                 close(pipes[1]);
904                 close(pipes[2]);
905                 if (read(pipes[0], sig_buf, 1) < 1) {
906                         ERROR("can't read from child\n");
907                         return -1;
908                 }
909                 close(pipes[0]);
910                 if (opts.namespace & CLONE_NEWUSER) {
911                         bool has_gr = (opts.gr_gid != -1);
912                         if (write_setgroups(jail_process.pid, false)) {
913                                 ERROR("can't write setgroups\n");
914                                 return -1;
915                         }
916                         if (opts.pw_uid != -1) {
917                                 write_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
918                                 write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
919                         } else {
920                                 write_uid_gid_map(jail_process.pid, 0, 65534);
921                                 write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
922                         }
923                 }
924 
925                 if (opts.namespace & CLONE_NEWNET) {
926                         if (!opts.name) {
927                                 ERROR("netns needs a named jail\n");
928                                 return -1;
929                         }
930                         netns_fd = netns_open_pid(jail_process.pid);
931                         netns_updown(jail_process.pid, true);
932                 }
933 
934                 sig_buf[0] = 'O';
935                 if (write(pipes[3], sig_buf, 1) < 0) {
936                         ERROR("can't write to child\n");
937                         return -1;
938                 }
939                 close(pipes[3]);
940                 uloop_process_add(&jail_process);
941                 uloop_run();
942                 if (jail_running) {
943                         DEBUG("uloop interrupted, killing jail process\n");
944                         kill(jail_process.pid, SIGTERM);
945                         uloop_timeout_set(&jail_process_timeout, 1000);
946                         uloop_run();
947                 }
948                 uloop_done();
949                 if (opts.namespace & CLONE_NEWNET) {
950                         setns(netns_fd, CLONE_NEWNET);
951                         netns_updown(getpid(), false);
952                         close(netns_fd);
953                 }
954                 return jail_return_code;
955         } else if (jail_process.pid == 0) {
956                 /* fork child process */
957                 return exec_jail(NULL);
958         } else {
959                 ERROR("failed to clone/fork: %m\n");
960                 return EXIT_FAILURE;
961         }
962 }
963 

This page was automatically generated by LXR 0.3.1.  •  OpenWrt