-- John O'Meara (typed on a cell phone; please excuse auto correct errors) On Sat, Jul 15, 2017, 2:48 PM Jesse Young <jlyo_at_jlyo.org> wrote: > This patch modifies s6-supervise to use the Linux specific clone() > system call to enable the child process to become the pid 1 of a new > pid namespace. To enable it, compile with -DWANT_CLONE_NEWPID and make > the ./clone-newpid file readable to s6-supervise in the desired service > directories. > > I ask that this be included in s6-supervise.c because doing > unshare(CLONE_NEWPID) in the child process doesn't change the process's > pid to 1. Rather, it runs the next spawned child as pid 1. After > spawning that first process, the parent is prevented from spawning any > future children, subsequent attempts will fail with ENOMEM. Changing > s6-supervise to use clone() avoids these limitations as well as avoiding > extending the supervision chain, which would make exit/signal proxying > necessary. > > To see correct ps output, /proc needs to be remounted. To avoid > conflicts with the parent pid namespace's /proc, this is done in a new > mount namespace. > > For example: > #!/bin/execlineb -P > unshare -m -- > foreground { umount /proc } > if -- { mount -t proc proc /proc } > exec ... > > The functions added in this patch could be migrated into skalibs or > libs6, but I wanted to start with this as a PoC without making API > changes. > > Jesse > > --- > src/supervision/s6-supervise.c | 87 > +++++++++++++++++++++++++++++++----------- 1 file changed, 65 > insertions(+), 22 deletions(-) > > diff --git a/src/supervision/s6-supervise.c > b/src/supervision/s6-supervise.c index 2e8fa38..7605a82 100644 > --- a/src/supervision/s6-supervise.c > +++ b/src/supervision/s6-supervise.c > _at__at_ -9,6 +9,9 @@ > #include <errno.h> > #include <fcntl.h> > #include <signal.h> > +#ifdef WANT_CLONE_NEWPID > +# include <sched.h> > +#endif > #include <skalibs/allreadwrite.h> > #include <skalibs/bytestr.h> > #include <skalibs/uint.h> > _at__at_ -203,6 +206,67 @@ static int maybesetsid (void) > return 1 ; > } > > +static void exec_run(int p[2], int notifyp[2], int fd) > gccattr_noreturn ; +static void exec_run(int p[2], int notifyp[2], int > fd) +{ > + char const *cargv[2] = { "run", 0 } ; > + PROG = "s6-supervise (child)" ; > + selfpipe_finish() ; > + if (notifyp[0] >= 0) close(notifyp[0]) ; > + close(p[0]) ; > + if (notifyp[1] >= 0 && fd_move(fd, notifyp[1]) < 0) > + { > + failcoe(p[1]) ; > + strerr_diefu1sys(127, "move notification descriptor") ; > + } > + if (!maybesetsid()) > + { > + failcoe(p[1]) ; > + strerr_diefu1sys(127, "access ./nosetsid") ; > + } > + execve("./run", (char *const *)cargv, (char *const *)environ) ; > + failcoe(p[1]) ; > + strerr_dieexec(127, "run") ; > +} > + > +static pid_t spawn_run_fork(int p[2], int notifyp[2], int fd) > +{ > + pid_t pid = fork() ; > + if (!pid) exec_run(p, notifyp, fd) ; > + return pid ; > +} > + > +#ifdef WANT_CLONE_NEWPID > +typedef struct > +{ > + int p[2] ; > + int notifyp[2] ; > + int fd ; > +} exec_run_t ; > + > +static int exec_run_shim(void *ctx) gccattr_noreturn ; > +static int exec_run_shim(void *ctx) > +{ > + exec_run_t *er = (exec_run_t *) ctx ; > + exec_run(er->p, er->notifyp, er->fd) ; > +} > + > +static pid_t spawn_run(int p[2], int notifyp[2], int fd) > +{ > + exec_run_t arg = { { p[0], p[1] }, { notifyp[0], notifyp[1] }, fd } ; > + char child_stack[SIGSTKSZ] ; > + if (access("clone-newpid", F_OK) < 0 && errno == ENOENT) > + return spawn_run_fork(p, notifyp, fd) ; > + return (pid_t) clone(&exec_run_shim, child_stack + > sizeof(child_stack), > + CLONE_NEWPID | SIGCHLD, &arg) ; > +} > +#else /* if !defined(WANT_CLONE_NEWPID) */ > +static pid_t spawn_run(int p[2], int notifyp[2], int fd) > +{ > + return spawn_run_fork(p, notifyp, fd) ; > +} > +#endif /* defined(WANT_CLONE_NEWPID) */ > + > static void trystart (void) > { > int p[2] ; > _at__at_ -222,7 +286,7 @@ static void trystart (void) > fd_close(p[1]) ; fd_close(p[0]) ; > return ; > } > - pid = fork() ; > + pid = spawn_run(p, notifyp, (int)fd) ; > if (pid < 0) > { > settimeout(60) ; > _at__at_ -232,27 +296,6 @@ static void trystart (void) > fd_close(p[1]) ; fd_close(p[0]) ; > return ; > } > - else if (!pid) > - { > - char const *cargv[2] = { "run", 0 } ; > - PROG = "s6-supervise (child)" ; > - selfpipe_finish() ; > - if (notifyp[0] >= 0) close(notifyp[0]) ; > - close(p[0]) ; > - if (notifyp[1] >= 0 && fd_move((int)fd, notifyp[1]) < 0) > - { > - failcoe(p[1]) ; > - strerr_diefu1sys(127, "move notification descriptor") ; > - } > - if (!maybesetsid()) > - { > - failcoe(p[1]) ; > - strerr_diefu1sys(127, "access ./nosetsid") ; > - } > - execve("./run", (char *const *)cargv, (char *const *)environ) ; > - failcoe(p[1]) ; > - strerr_dieexec(127, "run") ; > - } > if (notifyp[1] >= 0) fd_close(notifyp[1]) ; > fd_close(p[1]) ; > { > -- > 2.13.1 >Received on Sat Jul 15 2017 - 20:24:25 UTC
This archive was generated by hypermail 2.3.0 : Sun May 09 2021 - 19:38:49 UTC