[PATCH] s6-supervise: Optionally run child in a new pid namespace

From: Jesse Young <jlyo_at_jlyo.org>
Date: Sat, 15 Jul 2017 13:44:28 -0500

This patch modifies s6-supervise to use the Linux specific clone()
system call to enable the child process to become the pid 1 of a new
pid namespace. To enable it, compile with -DWANT_CLONE_NEWPID and make
the ./clone-newpid file readable to s6-supervise in the desired service
directories.

I ask that this be included in s6-supervise.c because doing
unshare(CLONE_NEWPID) in the child process doesn't change the process's
pid to 1. Rather, it runs the next spawned child as pid 1. After
spawning that first process, the parent is prevented from spawning any
future children, subsequent attempts will fail with ENOMEM. Changing
s6-supervise to use clone() avoids these limitations as well as avoiding
extending the supervision chain, which would make exit/signal proxying
necessary.

To see correct ps output, /proc needs to be remounted. To avoid
conflicts with the parent pid namespace's /proc, this is done in a new
mount namespace.

For example:
#!/bin/execlineb -P
unshare -m --
foreground { umount /proc }
if -- { mount -t proc proc /proc }
exec ...

The functions added in this patch could be migrated into skalibs or
libs6, but I wanted to start with this as a PoC without making API
changes.

Jesse

---
 src/supervision/s6-supervise.c | 87
+++++++++++++++++++++++++++++++----------- 1 file changed, 65
insertions(+), 22 deletions(-)
diff --git a/src/supervision/s6-supervise.c
b/src/supervision/s6-supervise.c index 2e8fa38..7605a82 100644
--- a/src/supervision/s6-supervise.c
+++ b/src/supervision/s6-supervise.c
_at_@ -9,6 +9,9 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
+#ifdef WANT_CLONE_NEWPID
+#  include <sched.h>
+#endif
 #include <skalibs/allreadwrite.h>
 #include <skalibs/bytestr.h>
 #include <skalibs/uint.h>
_at_@ -203,6 +206,67 @@ static int maybesetsid (void)
   return 1 ;
 }
 
+static void exec_run(int p[2], int notifyp[2], int fd)
gccattr_noreturn ; +static void exec_run(int p[2], int notifyp[2], int
fd) +{
+  char const *cargv[2] = { "run", 0 } ;
+  PROG = "s6-supervise (child)" ;
+  selfpipe_finish() ;
+  if (notifyp[0] >= 0) close(notifyp[0]) ;
+  close(p[0]) ;
+  if (notifyp[1] >= 0 && fd_move(fd, notifyp[1]) < 0)
+  {
+    failcoe(p[1]) ;
+    strerr_diefu1sys(127, "move notification descriptor") ;
+  }
+  if (!maybesetsid())
+  {
+    failcoe(p[1]) ;
+    strerr_diefu1sys(127, "access ./nosetsid") ;
+  }
+  execve("./run", (char *const *)cargv, (char *const *)environ) ;
+  failcoe(p[1]) ;
+  strerr_dieexec(127, "run") ;
+}
+
+static pid_t spawn_run_fork(int p[2], int notifyp[2], int fd)
+{
+  pid_t pid = fork() ;
+  if (!pid) exec_run(p, notifyp, fd) ;
+  return pid ;
+}
+
+#ifdef WANT_CLONE_NEWPID
+typedef struct
+{
+  int p[2] ;
+  int notifyp[2] ;
+  int fd ;
+} exec_run_t ;
+
+static int exec_run_shim(void *ctx) gccattr_noreturn ;
+static int exec_run_shim(void *ctx)
+{
+  exec_run_t *er = (exec_run_t *) ctx ;
+  exec_run(er->p, er->notifyp, er->fd) ;
+}
+
+static pid_t spawn_run(int p[2], int notifyp[2], int fd)
+{
+  exec_run_t arg = { { p[0], p[1] }, { notifyp[0], notifyp[1] }, fd } ;
+  char child_stack[SIGSTKSZ] ;
+  if (access("clone-newpid", F_OK) < 0 && errno == ENOENT)
+    return spawn_run_fork(p, notifyp, fd) ;
+  return (pid_t) clone(&exec_run_shim, child_stack +
sizeof(child_stack),
+      CLONE_NEWPID | SIGCHLD, &arg) ;
+}
+#else /* if !defined(WANT_CLONE_NEWPID) */
+static pid_t spawn_run(int p[2], int notifyp[2], int fd)
+{
+  return spawn_run_fork(p, notifyp, fd) ;
+}
+#endif /* defined(WANT_CLONE_NEWPID) */
+
 static void trystart (void)
 {
   int p[2] ;
_at_@ -222,7 +286,7 @@ static void trystart (void)
     fd_close(p[1]) ; fd_close(p[0]) ;
     return ;
   }
-  pid = fork() ;
+  pid = spawn_run(p, notifyp, (int)fd) ;
   if (pid < 0)
   {
     settimeout(60) ;
_at_@ -232,27 +296,6 @@ static void trystart (void)
     fd_close(p[1]) ; fd_close(p[0]) ;
     return ;
   }
-  else if (!pid)
-  {
-    char const *cargv[2] = { "run", 0 } ;
-    PROG = "s6-supervise (child)" ;
-    selfpipe_finish() ;
-    if (notifyp[0] >= 0) close(notifyp[0]) ;
-    close(p[0]) ;
-    if (notifyp[1] >= 0 && fd_move((int)fd, notifyp[1]) < 0)
-    {
-      failcoe(p[1]) ;
-      strerr_diefu1sys(127, "move notification descriptor") ;
-    }
-    if (!maybesetsid())
-    {
-      failcoe(p[1]) ;
-      strerr_diefu1sys(127, "access ./nosetsid") ;
-    }
-    execve("./run", (char *const *)cargv, (char *const *)environ) ;
-    failcoe(p[1]) ;
-    strerr_dieexec(127, "run") ;
-  }
   if (notifyp[1] >= 0) fd_close(notifyp[1]) ;
   fd_close(p[1]) ;
   {
-- 
2.13.1
Received on Sat Jul 15 2017 - 18:44:28 UTC

This archive was generated by hypermail 2.3.0 : Sun May 09 2021 - 19:38:49 UTC