diff options
author | markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-01-30 22:17:35 +0000 |
---|---|---|
committer | markus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-01-30 22:17:35 +0000 |
commit | 28af78c4ea3cdeaa959ded5c3bf29d707012774a (patch) | |
tree | 13739bfdb4f5dbbb300abd3448ce89d64ffbc1e9 | |
parent | e3b4a132b40d90b5a60f83d9b3eb415cfc85cd35 (diff) | |
download | chromium_src-28af78c4ea3cdeaa959ded5c3bf29d707012774a.zip chromium_src-28af78c4ea3cdeaa959ded5c3bf29d707012774a.tar.gz chromium_src-28af78c4ea3cdeaa959ded5c3bf29d707012774a.tar.bz2 |
Calling clone(CLONE_NEWPID) results in the new pid namespace getting a new "init" process.
This process is now resposible for reaping all child processes that no longer have a
direct parent process.
Often, failure to do this goes unnoticed, because our sandbox'd processes don't often
fork other processes that then continue to turn into daemon processes. But there is no
reason, why they couldn't occasionally do so. And in fact, the seccomp sandbox does do
so for its trusted process.
In the past, this would result in us having lots of uncollected zombie processes that
only disappeared when the browser terminated.
BUG=109944
TEST=Run Chrome with both the suid sandbox and the seccomp sandbox, open and close a few tabs, verify that we don't produce any zombie processes
Review URL: https://chromiumcodereview.appspot.com/9295005
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@119746 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | sandbox/linux/suid/init_process.c | 201 | ||||
-rw-r--r-- | sandbox/linux/suid/init_process.h | 11 | ||||
-rw-r--r-- | sandbox/linux/suid/sandbox.c | 180 | ||||
-rw-r--r-- | sandbox/sandbox.gyp | 2 |
4 files changed, 346 insertions, 48 deletions
diff --git a/sandbox/linux/suid/init_process.c b/sandbox/linux/suid/init_process.c new file mode 100644 index 0000000..9ce632d --- /dev/null +++ b/sandbox/linux/suid/init_process.c @@ -0,0 +1,201 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#define _GNU_SOURCE +#include "init_process.h" + +#include <dirent.h> +#include <fcntl.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + + +static int getProcessStatus(int proc_fd, const char *process, + const char *field) { + int ret = -1; + + // Open "/proc/${process}/status" + char *buf = malloc(strlen(process) + 80); + sprintf(buf, "%s/status", process); + int fd = openat(proc_fd, buf, O_RDONLY); + if (fd >= 0) { + // Only bother to read the first 4kB. All of the fields that we + // are interested in will show up much earlier. + buf = realloc(buf, 4097); + size_t sz = read(fd, buf, 4096); + if (sz > 0) { + // Find a matching "field" + buf[sz] = '\000'; + char *f = malloc(strlen(field) + 4); + sprintf(f, "\n%s:\t", field); + char *ptr = strstr(buf, f); + if (ptr) { + // Extract the numerical value of the "field" + ret = atoi(ptr + strlen(f)); + } + free(f); + } + close(fd); + } + free(buf); + return ret; +} + +static bool hasChildren(int proc_fd, int pid) { + bool ret = false; + + // Open "/proc" + int fd = dup(proc_fd); + lseek(fd, SEEK_SET, 0); + DIR *dir = fd >= 0 ? fdopendir(fd) : NULL; + struct dirent de, *res; + while (dir && !readdir_r(dir, &de, &res) && res) { + // Find numerical entries. Those are processes. + if (res->d_name[0] <= '0' || res->d_name[0] > '9') { + continue; + } + + // For each process, check the parent's pid + int ppid = getProcessStatus(proc_fd, res->d_name, "PPid"); + + if (ppid == pid) { + // We found a child process. We can stop searching, now + ret = true; + break; + } + } + closedir(dir); + return ret; +} + +void SystemInitProcess(int init_fd, int child_pid, int proc_fd, int null_fd) { + int ret = 0; + + // CLONE_NEWPID doesn't adjust the contents of the "/proc" file system. + // This is very confusing. And it is even possible the kernel developers + // will consider this a bug and fix it at some point in the future. + // So, to be on the safe side, we explicitly retrieve our process id + // from the "/proc" file system. This should continue to work, even if + // the kernel eventually gets fixed so that "/proc" shows the view from + // inside of the new pid namespace. + pid_t init_pid = getProcessStatus(proc_fd, "self", "Pid"); + if (init_pid <= 0) { + fprintf(stderr, + "Failed to determine real process id of new \"init\" process\n"); + _exit(1); + } + + // Redirect stdio to /dev/null + if (null_fd < 0 || + dup2(null_fd, 0) != 0 || + dup2(null_fd, 1) != 1 || + dup2(null_fd, 2) != 2) { + fprintf(stderr, "Failed to point stdio to a safe place\n"); + _exit(1); + } + close(null_fd); + + // Close all file handles + int fds_fd = openat(proc_fd, "self/fd", O_RDONLY | O_DIRECTORY); + DIR *dir = fds_fd >= 0 ? fdopendir(fds_fd) : NULL; + if (dir == NULL) { + // If we don't know the list of our open file handles, just try closing + // all valid ones. + for (int fd = sysconf(_SC_OPEN_MAX); --fd > 2; ) { + if (fd != init_fd && fd != proc_fd) { + close(fd); + } + } + } else { + // If available, it is much more efficient to just close the file + // handles that show up in "/proc/self/fd/" + struct dirent de, *res; + while (!readdir_r(dir, &de, &res) && res) { + if (res->d_name[0] < '0') + continue; + int fd = atoi(res->d_name); + if (fd > 2 && fd != init_fd && fd != proc_fd && fd != dirfd(dir)) { + close(fd); + } + } + closedir(dir); + } + + // Set up signal handler to catch SIGCHLD, but mask the signal for now + sigset_t mask; + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + sigprocmask(SIG_BLOCK, &mask, NULL); + + // Notify other processes that we are done initializing + write(init_fd, " ", 1); + close(init_fd); + + // Handle dying processes that have been re-parented to the "init" process + for (;;) { + // Wait until we receive a SIGCHLD signal. Our signal handler doesn't + // actually need to do anything, though + sigwaitinfo(&mask, NULL); + + bool retry = false; + do { + for (;;) { + // Reap all exit codes of our child processes. This includes both + // processes that originally were our immediate children, and processes + // that have since been re-parented to be our children. + int status; + pid_t pid = waitpid(0, &status, __WALL | WNOHANG); + if (pid <= 0) { + break; + } else { + // We found some newly deceased child processes. Better schedule + // another very thorough inspection of our state. + retry = false; + } + if (pid == child_pid) { + // If our first immediate child died, remember its exit code. That's + // the exit code that we should be reporting to our parent process + if (WIFEXITED(status)) { + ret = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + ret = -WTERMSIG(status); + } + } + } + if (hasChildren(proc_fd, init_pid)) { + // As long as we still have child processes, continue waiting for + // their ultimate demise. + retry = false; + } else { + if (retry) { + // No more child processes. We can exit now. + if (ret < 0) { + // Try to exit with the same signal that our child terminated with + signal(-ret, SIG_DFL); + kill(1, -ret); + ret = 1; + } + // Exit with the same exit code that our child exited with + _exit(ret); + } else { + // There is a little bit of a race condition between getting + // notifications and scanning the "/proc" file system. This is + // particularly true, because scanning "/proc" cannot possibly be + // an atomic operation. + // If we find that we no longer appear to have any children, we check + // one more time whether there are any children we can now reap. + // They might have died while we were scanning "/proc" and if so, + // they should now show up. + retry = true; + } + } + } while (retry); + } +} diff --git a/sandbox/linux/suid/init_process.h b/sandbox/linux/suid/init_process.h new file mode 100644 index 0000000..45bc69a --- /dev/null +++ b/sandbox/linux/suid/init_process.h @@ -0,0 +1,11 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef SANDBOX_LINUX_SUID_INIT_PROCESS_H_ +#define SANDBOX_LINUX_SUID_INIT_PROCESS_H_ + +void SystemInitProcess(int init_fd, int child_pid, int proc_fd, int null_fd) + __attribute__((noreturn)); + +#endif // SANDBOX_LINUX_SUID_INIT_PROCESS_H_ diff --git a/sandbox/linux/suid/sandbox.c b/sandbox/linux/suid/sandbox.c index a545208..475378c 100644 --- a/sandbox/linux/suid/sandbox.c +++ b/sandbox/linux/suid/sandbox.c @@ -1,4 +1,4 @@ -// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -26,6 +26,7 @@ #include <sys/vfs.h> #include <unistd.h> +#include "init_process.h" #include "linux_util.h" #include "process_util.h" #include "suid_unsafe_environment_variables.h" @@ -56,7 +57,7 @@ static void FatalError(const char *msg, ...) { vfprintf(stderr, msg, ap); fprintf(stderr, ": %s\n", strerror(errno)); fflush(stderr); - exit(1); + _exit(1); } // We will chroot() to the helper's /proc/self directory. Anything there will @@ -70,11 +71,47 @@ static void FatalError(const char *msg, ...) { #define SAFE_DIR "/proc/self/fdinfo" #define SAFE_DIR2 "/proc/self/fd" -static bool SpawnChrootHelper() { +static bool DropRoot() { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) { + perror("prctl(PR_SET_DUMPABLE)"); + return false; + } + + if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) { + perror("Still dumpable after prctl(PR_SET_DUMPABLE)"); + return false; + } + + gid_t rgid, egid, sgid; + if (getresgid(&rgid, &egid, &sgid)) { + perror("getresgid"); + return false; + } + + if (setresgid(rgid, rgid, rgid)) { + perror("setresgid"); + return false; + } + + uid_t ruid, euid, suid; + if (getresuid(&ruid, &euid, &suid)) { + perror("getresuid"); + return false; + } + + if (setresuid(ruid, ruid, ruid)) { + perror("setresuid"); + return false; + } + + return true; +} + +static int SpawnChrootHelper() { int sv[2]; if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) { perror("socketpair"); - return false; + return -1; } char *safedir = NULL; @@ -86,7 +123,7 @@ static bool SpawnChrootHelper() { safedir = SAFE_DIR2; else { fprintf(stderr, "Could not find %s\n", SAFE_DIR2); - return false; + return -1; } const pid_t pid = syscall( @@ -96,7 +133,7 @@ static bool SpawnChrootHelper() { perror("clone"); close(sv[0]); close(sv[1]); - return false; + return -1; } if (pid == 0) { @@ -124,6 +161,7 @@ static bool SpawnChrootHelper() { FatalError("read"); // do chrooting + errno = 0; if (msg != kMsgChrootMe) FatalError("Unknown message from sandboxed process"); @@ -156,7 +194,7 @@ static bool SpawnChrootHelper() { if (close(sv[0])) { close(sv[1]); perror("close"); - return false; + return -1; } // In the parent process, we install an environment variable containing the @@ -165,13 +203,14 @@ static bool SpawnChrootHelper() { int printed = snprintf(desc_str, sizeof(desc_str), "%u", sv[1]); if (printed < 0 || printed >= (int)sizeof(desc_str)) { fprintf(stderr, "Failed to snprintf\n"); - return false; + close(sv[1]); + return -1; } if (setenv(kSandboxDescriptorEnvironmentVarName, desc_str, 1)) { perror("setenv"); close(sv[1]); - return false; + return -1; } // We also install an environment variable containing the pid of the child @@ -179,15 +218,51 @@ static bool SpawnChrootHelper() { printed = snprintf(helper_pid_str, sizeof(helper_pid_str), "%u", pid); if (printed < 0 || printed >= (int)sizeof(helper_pid_str)) { fprintf(stderr, "Failed to snprintf\n"); - return false; + close(sv[1]); + return -1; } if (setenv(kSandboxHelperPidEnvironmentVarName, helper_pid_str, 1)) { perror("setenv"); close(sv[1]); - return false; + return -1; } + return sv[1]; +} + +static bool JailMe() { + int fd = SpawnChrootHelper(); + if (fd < 0) { + return false; + } + if (!DropRoot()) { + close(fd); + return false; + } + ssize_t bytes; + char ch = kMsgChrootMe; + do { + errno = 0; + bytes = write(fd, &ch, 1); + } while (bytes == -1 && errno == EINTR); + if (bytes != 1) { + perror("write"); + close(fd); + return false; + } + do { + errno = 0; + bytes = read(fd, &ch, 1); + } while (bytes == -1 && errno == EINTR); + close(fd); + if (bytes != 1) { + perror("read"); + return false; + } + if (ch != kMsgChrootSuccessful) { + return false; + } return true; } @@ -207,6 +282,51 @@ static bool MoveToNewNamespaces() { _exit(0); if (pid == 0) { + if (syscall(__NR_getpid) == 1) { + int fds[2]; + char ch = 0; + if (pipe(fds)) { + perror("Failed to create pipe"); + _exit(1); + } + pid = fork(); + if (pid > 0) { + // The very first process in the new namespace takes on the + // role of the traditional "init" process. It must reap exit + // codes of daemon processes until the namespace is completely + // empty. + // We have to be careful that this "init" process doesn't + // provide a new attack surface. So, we also move it into + // a separate chroot and we drop all privileges. It does + // still need to access "/proc" and "/dev/null", though. So, + // we have to provide it with a file handles to these resources. + // These file handle are not accessible by any other processes in + // the sandbox and thus safe. + close(fds[0]); + int proc_fd = open("/proc", O_RDONLY | O_DIRECTORY); + int null_fd = open("/dev/null", O_RDWR); + if (!JailMe()) { + FatalError("Could not remove privileges from " + "new \"init\" process"); + } + SystemInitProcess(fds[1], pid, proc_fd, null_fd); + } else if (pid != 0) { + perror("Failed to fork"); + _exit(1); + } + // Wait for the "init" process to complete initialization. + close(fds[1]); + errno = 0; + while (read(fds[0], &ch, 1) < 0 && errno == EINTR) { + } + close(fds[0]); + if (ch != ' ') { + // We'll likely never get here. If the "init" process fails, it's + // death typically takes everyone of its children with it. + FatalError("Failed to set up new \"init\" process inside sandbox"); + } + } + if (kCloneExtraFlags[i] & CLONE_NEWPID) { setenv("SBX_PID_NS", "", 1 /* overwrite */); } else { @@ -232,42 +352,6 @@ static bool MoveToNewNamespaces() { return true; } -static bool DropRoot() { - if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0)) { - perror("prctl(PR_SET_DUMPABLE)"); - return false; - } - - if (prctl(PR_GET_DUMPABLE, 0, 0, 0, 0)) { - perror("Still dumpable after prctl(PR_SET_DUMPABLE)"); - return false; - } - - gid_t rgid, egid, sgid; - if (getresgid(&rgid, &egid, &sgid)) { - perror("getresgid"); - return false; - } - - if (setresgid(rgid, rgid, rgid)) { - perror("setresgid"); - return false; - } - - uid_t ruid, euid, suid; - if (getresuid(&ruid, &euid, &suid)) { - perror("getresuid"); - return false; - } - - if (setresuid(ruid, ruid, ruid)) { - perror("setresuid"); - return false; - } - - return true; -} - static bool SetupChildEnvironment() { unsigned i; @@ -343,7 +427,7 @@ int main(int argc, char **argv) { if (!MoveToNewNamespaces()) return 1; - if (!SpawnChrootHelper()) + if (SpawnChrootHelper() < 0) return 1; if (!DropRoot()) return 1; diff --git a/sandbox/sandbox.gyp b/sandbox/sandbox.gyp index f7ebcd7..4224019 100644 --- a/sandbox/sandbox.gyp +++ b/sandbox/sandbox.gyp @@ -155,6 +155,8 @@ 'target_name': 'chrome_sandbox', 'type': 'executable', 'sources': [ + 'linux/suid/init_process.c', + 'linux/suid/init_process.h', 'linux/suid/linux_util.c', 'linux/suid/linux_util.h', 'linux/suid/process_util.h', |