Re: [PATCH] s6-supervise: Optionally run child in a new pid namespace

From: John O'Meara <john.fr.omeara_at_gmail.com>
Date: Sat, 15 Jul 2017 20:24:25 +0000

You can achieve a PID namespace (and others) using the unshare program from
util-linux without patching s6. Put the following at the top of your run
script:

  unshare -fp --mount-proc

this also has the advantage of clearly showing which services are in their
own namespaces when looking at a ps listing, especially for forest views
("ps f" or "s6-ps -H")

-- 
John O'Meara
(typed on a cell phone; please excuse auto correct errors)
On Sat, Jul 15, 2017, 2:48 PM Jesse Young <jlyo_at_jlyo.org> wrote:
> This patch modifies s6-supervise to use the Linux specific clone()
> system call to enable the child process to become the pid 1 of a new
> pid namespace. To enable it, compile with -DWANT_CLONE_NEWPID and make
> the ./clone-newpid file readable to s6-supervise in the desired service
> directories.
>
> I ask that this be included in s6-supervise.c because doing
> unshare(CLONE_NEWPID) in the child process doesn't change the process's
> pid to 1. Rather, it runs the next spawned child as pid 1. After
> spawning that first process, the parent is prevented from spawning any
> future children, subsequent attempts will fail with ENOMEM. Changing
> s6-supervise to use clone() avoids these limitations as well as avoiding
> extending the supervision chain, which would make exit/signal proxying
> necessary.
>
> To see correct ps output, /proc needs to be remounted. To avoid
> conflicts with the parent pid namespace's /proc, this is done in a new
> mount namespace.
>
> For example:
> #!/bin/execlineb -P
> unshare -m --
> foreground { umount /proc }
> if -- { mount -t proc proc /proc }
> exec ...
>
> The functions added in this patch could be migrated into skalibs or
> libs6, but I wanted to start with this as a PoC without making API
> changes.
>
> Jesse
>
> ---
>  src/supervision/s6-supervise.c | 87
> +++++++++++++++++++++++++++++++----------- 1 file changed, 65
> insertions(+), 22 deletions(-)
>
> diff --git a/src/supervision/s6-supervise.c
> b/src/supervision/s6-supervise.c index 2e8fa38..7605a82 100644
> --- a/src/supervision/s6-supervise.c
> +++ b/src/supervision/s6-supervise.c
> _at__at_ -9,6 +9,9 @@
>  #include <errno.h>
>  #include <fcntl.h>
>  #include <signal.h>
> +#ifdef WANT_CLONE_NEWPID
> +#  include <sched.h>
> +#endif
>  #include <skalibs/allreadwrite.h>
>  #include <skalibs/bytestr.h>
>  #include <skalibs/uint.h>
> _at__at_ -203,6 +206,67 @@ static int maybesetsid (void)
>    return 1 ;
>  }
>
> +static void exec_run(int p[2], int notifyp[2], int fd)
> gccattr_noreturn ; +static void exec_run(int p[2], int notifyp[2], int
> fd) +{
> +  char const *cargv[2] = { "run", 0 } ;
> +  PROG = "s6-supervise (child)" ;
> +  selfpipe_finish() ;
> +  if (notifyp[0] >= 0) close(notifyp[0]) ;
> +  close(p[0]) ;
> +  if (notifyp[1] >= 0 && fd_move(fd, notifyp[1]) < 0)
> +  {
> +    failcoe(p[1]) ;
> +    strerr_diefu1sys(127, "move notification descriptor") ;
> +  }
> +  if (!maybesetsid())
> +  {
> +    failcoe(p[1]) ;
> +    strerr_diefu1sys(127, "access ./nosetsid") ;
> +  }
> +  execve("./run", (char *const *)cargv, (char *const *)environ) ;
> +  failcoe(p[1]) ;
> +  strerr_dieexec(127, "run") ;
> +}
> +
> +static pid_t spawn_run_fork(int p[2], int notifyp[2], int fd)
> +{
> +  pid_t pid = fork() ;
> +  if (!pid) exec_run(p, notifyp, fd) ;
> +  return pid ;
> +}
> +
> +#ifdef WANT_CLONE_NEWPID
> +typedef struct
> +{
> +  int p[2] ;
> +  int notifyp[2] ;
> +  int fd ;
> +} exec_run_t ;
> +
> +static int exec_run_shim(void *ctx) gccattr_noreturn ;
> +static int exec_run_shim(void *ctx)
> +{
> +  exec_run_t *er = (exec_run_t *) ctx ;
> +  exec_run(er->p, er->notifyp, er->fd) ;
> +}
> +
> +static pid_t spawn_run(int p[2], int notifyp[2], int fd)
> +{
> +  exec_run_t arg = { { p[0], p[1] }, { notifyp[0], notifyp[1] }, fd } ;
> +  char child_stack[SIGSTKSZ] ;
> +  if (access("clone-newpid", F_OK) < 0 && errno == ENOENT)
> +    return spawn_run_fork(p, notifyp, fd) ;
> +  return (pid_t) clone(&exec_run_shim, child_stack +
> sizeof(child_stack),
> +      CLONE_NEWPID | SIGCHLD, &arg) ;
> +}
> +#else /* if !defined(WANT_CLONE_NEWPID) */
> +static pid_t spawn_run(int p[2], int notifyp[2], int fd)
> +{
> +  return spawn_run_fork(p, notifyp, fd) ;
> +}
> +#endif /* defined(WANT_CLONE_NEWPID) */
> +
>  static void trystart (void)
>  {
>    int p[2] ;
> _at__at_ -222,7 +286,7 @@ static void trystart (void)
>      fd_close(p[1]) ; fd_close(p[0]) ;
>      return ;
>    }
> -  pid = fork() ;
> +  pid = spawn_run(p, notifyp, (int)fd) ;
>    if (pid < 0)
>    {
>      settimeout(60) ;
> _at__at_ -232,27 +296,6 @@ static void trystart (void)
>      fd_close(p[1]) ; fd_close(p[0]) ;
>      return ;
>    }
> -  else if (!pid)
> -  {
> -    char const *cargv[2] = { "run", 0 } ;
> -    PROG = "s6-supervise (child)" ;
> -    selfpipe_finish() ;
> -    if (notifyp[0] >= 0) close(notifyp[0]) ;
> -    close(p[0]) ;
> -    if (notifyp[1] >= 0 && fd_move((int)fd, notifyp[1]) < 0)
> -    {
> -      failcoe(p[1]) ;
> -      strerr_diefu1sys(127, "move notification descriptor") ;
> -    }
> -    if (!maybesetsid())
> -    {
> -      failcoe(p[1]) ;
> -      strerr_diefu1sys(127, "access ./nosetsid") ;
> -    }
> -    execve("./run", (char *const *)cargv, (char *const *)environ) ;
> -    failcoe(p[1]) ;
> -    strerr_dieexec(127, "run") ;
> -  }
>    if (notifyp[1] >= 0) fd_close(notifyp[1]) ;
>    fd_close(p[1]) ;
>    {
> --
> 2.13.1
>
Received on Sat Jul 15 2017 - 20:24:25 UTC

This archive was generated by hypermail 2.3.0 : Sun May 09 2021 - 19:38:49 UTC