diff options
-rw-r--r-- | content/browser/zygote_host/zygote_host_impl_linux.cc | 2 | ||||
-rw-r--r-- | content/common/sandbox_init_linux.cc | 561 | ||||
-rw-r--r-- | content/common/sandbox_linux.cc | 100 | ||||
-rw-r--r-- | content/common/sandbox_linux.h | 23 | ||||
-rw-r--r-- | content/common/sandbox_seccomp_bpf_linux.cc | 539 | ||||
-rw-r--r-- | content/common/sandbox_seccomp_bpf_linux.h | 31 | ||||
-rw-r--r-- | content/content_common.gypi | 2 |
7 files changed, 677 insertions, 581 deletions
diff --git a/content/browser/zygote_host/zygote_host_impl_linux.cc b/content/browser/zygote_host/zygote_host_impl_linux.cc index f34df1c..e065273 100644 --- a/content/browser/zygote_host/zygote_host_impl_linux.cc +++ b/content/browser/zygote_host/zygote_host_impl_linux.cc @@ -100,11 +100,13 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) { switches::kVModule, switches::kRegisterPepperPlugins, switches::kDisableSeccompSandbox, + switches::kDisableSeccompFilterSandbox, switches::kEnableSeccompSandbox, // Zygote process needs to know what resources to have loaded when it // becomes a renderer process. switches::kForceDeviceScaleFactor, switches::kLoad2xResources, + switches::kNoSandbox, }; cmd_line.CopySwitchesFrom(browser_command_line, kForwardSwitches, arraysize(kForwardSwitches)); diff --git a/content/common/sandbox_init_linux.cc b/content/common/sandbox_init_linux.cc index b9cafa2..bd2504b 100644 --- a/content/common/sandbox_init_linux.cc +++ b/content/common/sandbox_init_linux.cc @@ -2,574 +2,47 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "content/common/seccomp_sandbox.h" -#include "content/public/common/sandbox_init.h" - -#if defined(__i386__) || defined(__x86_64__) - -// This is an assert for GYP -#if !defined(OS_LINUX) - #error "Linux specific file compiled on non Linux OS!" -#endif - -#include <asm/unistd.h> -#include <dlfcn.h> -#include <errno.h> -#include <fcntl.h> -#include <linux/audit.h> -#include <linux/filter.h> -#include <signal.h> -#include <string.h> -#include <sys/prctl.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <ucontext.h> -#include <unistd.h> - -#include <vector> +#include <string> #include "base/command_line.h" -#include "base/file_util.h" #include "base/logging.h" -#include "base/time.h" #include "content/common/sandbox_linux.h" #include "content/public/common/content_switches.h" -#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" - -// These are fairly new and not defined in all headers yet. -#if defined(__x86_64__) - -#ifndef __NR_process_vm_readv - #define __NR_process_vm_readv 310 -#endif - -#ifndef __NR_process_vm_writev - #define __NR_process_vm_writev 311 -#endif - -#elif defined(__i386__) - -#ifndef __NR_process_vm_readv - #define __NR_process_vm_readv 347 -#endif - -#ifndef __NR_process_vm_writev - #define __NR_process_vm_writev 348 -#endif - -#endif - -namespace { - -bool IsSingleThreaded() { - // Possibly racy, but it's ok because this is more of a debug check to catch - // new threaded situations arising during development. - int num_threads = - file_util::CountFilesCreatedAfter(FilePath("/proc/self/task"), - base::Time::UnixEpoch()); - - // We pass the test if we don't know ( == 0), because the setuid sandbox - // will prevent /proc access in some contexts. - return num_threads == 1 || num_threads == 0; -} - -inline bool IsChromeOS() { -#if defined(OS_CHROMEOS) - return true; -#else - return false; -#endif -} - -void LogSandboxStarted(const std::string& sandbox_name, - const std::string& process_type) { - const std::string activated_sandbox = - "Activated " + sandbox_name + " sandbox for process type: " + - process_type + "."; - if (IsChromeOS()) { - LOG(WARNING) << activated_sandbox; - } else { - VLOG(1) << activated_sandbox; - } -} - -intptr_t CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux) { - int syscall = args.nr; - if (syscall >= 1024) - syscall = 0; - // Encode 8-bits of the 1st two arguments too, so we can discern which socket - // type, which fcntl, ... etc., without being likely to hit a mapped - // address. - // Do not encode more bits here without thinking about increasing the - // likelihood of collision with mapped pages. - syscall |= ((args.args[0] & 0xffUL) << 12); - syscall |= ((args.args[1] & 0xffUL) << 20); - // Purposefully dereference the syscall as an address so it'll show up very - // clearly and easily in crash dumps. - volatile char* addr = reinterpret_cast<volatile char*>(syscall); - *addr = '\0'; - // In case we hit a mapped address, hit the null page with just the syscall, - // for paranoia. - syscall &= 0xfffUL; - addr = reinterpret_cast<volatile char*>(syscall); - *addr = '\0'; - for (;;) - _exit(1); -} - -// TODO(jln) we need to restrict the first parameter! -bool IsKillSyscall(int sysno) { - switch (sysno) { - case __NR_kill: - case __NR_tkill: - case __NR_tgkill: - return true; - default: - return false; - } -} - -bool IsGettimeSyscall(int sysno) { - switch (sysno) { - case __NR_clock_gettime: - case __NR_gettimeofday: - case __NR_time: - return true; - default: - return false; - } -} - -bool IsFileSystemSyscall(int sysno) { - switch (sysno) { - case __NR_open: - case __NR_openat: - case __NR_execve: - case __NR_access: - case __NR_mkdir: - case __NR_mkdirat: - case __NR_readlink: - case __NR_readlinkat: - case __NR_stat: - case __NR_lstat: - case __NR_chdir: - case __NR_mknod: - case __NR_mknodat: - return true; - default: - return false; - } -} - -bool IsAcceleratedVideoDecodeEnabled() { - // Accelerated video decode is currently enabled on Chrome OS, - // but not on Linux: crbug.com/137247. - bool is_enabled = IsChromeOS(); - - const CommandLine& command_line = *CommandLine::ForCurrentProcess(); - is_enabled = is_enabled && - !command_line.HasSwitch(switches::kDisableAcceleratedVideoDecode); - - return is_enabled; -} - -static const char kDriRcPath[] = "/etc/drirc"; - -// TODO(jorgelo): limited to /etc/drirc for now, extend this to cover -// other sandboxed file access cases. -int OpenWithCache(const char* pathname, int flags) { - static int drircfd = -1; - static bool do_open = true; - int res = -1; - - if (strcmp(pathname, kDriRcPath) == 0 && flags == O_RDONLY) { - if (do_open) { - drircfd = open(pathname, flags); - do_open = false; - res = drircfd; - } else { - // dup() man page: - // "After a successful return from one of these system calls, - // the old and new file descriptors may be used interchangeably. - // They refer to the same open file description and thus share - // file offset and file status flags; for example, if the file offset - // is modified by using lseek(2) on one of the descriptors, - // the offset is also changed for the other." - // Since |drircfd| can be dup()'ed and read many times, we need to - // lseek() it to the beginning of the file before returning. - // We assume the caller will not keep more than one fd open at any - // one time. Intel driver code in Mesa that parses /etc/drirc does - // open()/read()/close() in the same function. - if (drircfd < 0) { - errno = ENOENT; - return -1; - } - int newfd = dup(drircfd); - if (newfd < 0) { - errno = ENOMEM; - return -1; - } - if (lseek(newfd, 0, SEEK_SET) == static_cast<off_t>(-1)) { - (void) HANDLE_EINTR(close(newfd)); - errno = ENOMEM; - return -1; - } - res = newfd; - } - } else { - res = open(pathname, flags); - } - - return res; -} - -// We allow the GPU process to open /etc/drirc because it's needed by Mesa. -// OpenWithCache() has been called before enabling the sandbox, and has cached -// a file descriptor for /etc/drirc. -intptr_t GpuOpenSIGSYS_Handler(const struct arch_seccomp_data& args, - void* aux) { - uint64_t arg0 = args.args[0]; - uint64_t arg1 = args.args[1]; - const char* pathname = reinterpret_cast<const char*>(arg0); - int flags = static_cast<int>(arg1); - - if (strcmp(pathname, kDriRcPath) == 0) { - int ret = OpenWithCache(pathname, flags); - return (ret == -1) ? -errno : ret; - } else { - return -ENOENT; - } -} - -#if defined(__x86_64__) -// x86_64 only because it references system calls that are multiplexed on IA32. -playground2::Sandbox::ErrorCode GpuProcessPolicy_x86_64(int sysno) { - switch(sysno) { - case __NR_read: - case __NR_ioctl: - case __NR_poll: - case __NR_epoll_wait: - case __NR_recvfrom: - case __NR_write: - case __NR_writev: - case __NR_gettid: - case __NR_sched_yield: // Nvidia binary driver. - - case __NR_futex: - case __NR_madvise: - case __NR_sendmsg: - case __NR_recvmsg: - case __NR_eventfd2: - case __NR_pipe: - case __NR_mmap: - case __NR_mprotect: - case __NR_clone: // TODO(jln) restrict flags. - case __NR_set_robust_list: - case __NR_getuid: - case __NR_geteuid: - case __NR_getgid: - case __NR_getegid: - case __NR_epoll_create: - case __NR_fcntl: - case __NR_socketpair: - case __NR_epoll_ctl: - case __NR_prctl: - case __NR_fstat: - case __NR_close: - case __NR_restart_syscall: - case __NR_rt_sigreturn: - case __NR_brk: - case __NR_rt_sigprocmask: - case __NR_munmap: - case __NR_dup: - case __NR_mlock: - case __NR_munlock: - case __NR_exit: - case __NR_exit_group: - case __NR_lseek: - case __NR_getpid: // Nvidia binary driver. - case __NR_getppid: // ATI binary driver. - case __NR_shutdown: // Virtual driver. - case __NR_rt_sigaction: // Breakpad signal handler. - return playground2::Sandbox::SB_ALLOWED; - case __NR_socket: - return EACCES; // Nvidia binary driver. - case __NR_fchmod: - return EPERM; // ATI binary driver. - case __NR_open: - // Accelerated video decode is enabled by default only on Chrome OS. - if (IsAcceleratedVideoDecodeEnabled()) { - // Accelerated video decode needs to open /dev/dri/card0, and - // dup()'ing an already open file descriptor does not work. - // Allow open() even though it severely weakens the sandbox, - // to test the sandboxing mechanism in general. - // TODO(jorgelo): remove this once we solve the libva issue. - return playground2::Sandbox::SB_ALLOWED; - } else { - // Hook open() in the GPU process to allow opening /etc/drirc, - // needed by Mesa. - // The hook needs dup(), lseek(), and close() to be allowed. - return playground2::Sandbox::ErrorCode(GpuOpenSIGSYS_Handler, NULL); - } - default: - if (IsGettimeSyscall(sysno) || - IsKillSyscall(sysno)) { // GPU watchdog. - return playground2::Sandbox::SB_ALLOWED; - } - // Generally, filename-based syscalls will fail with ENOENT to behave - // similarly to a possible future setuid sandbox. - if (IsFileSystemSyscall(sysno)) { - return ENOENT; - } - // In any other case crash the program with our SIGSYS handler - return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL); - } -} - -// x86_64 only because it references system calls that are multiplexed on IA32. -playground2::Sandbox::ErrorCode FlashProcessPolicy_x86_64(int sysno) { - switch (sysno) { - case __NR_futex: - case __NR_write: - case __NR_epoll_wait: - case __NR_read: - case __NR_times: - case __NR_clone: // TODO(jln): restrict flags. - case __NR_set_robust_list: - case __NR_getuid: - case __NR_geteuid: - case __NR_getgid: - case __NR_getegid: - case __NR_epoll_create: - case __NR_fcntl: - case __NR_socketpair: - case __NR_pipe: - case __NR_epoll_ctl: - case __NR_gettid: - case __NR_prctl: - case __NR_fstat: - case __NR_sendmsg: - case __NR_mmap: - case __NR_munmap: - case __NR_mprotect: - case __NR_madvise: - case __NR_rt_sigaction: - case __NR_rt_sigprocmask: - case __NR_wait4: - case __NR_exit_group: - case __NR_exit: - case __NR_rt_sigreturn: - case __NR_restart_syscall: - case __NR_close: - case __NR_recvmsg: - case __NR_lseek: - case __NR_brk: - case __NR_sched_yield: - case __NR_shutdown: - case __NR_sched_getaffinity: - case __NR_sched_setscheduler: - case __NR_dup: // Flash Access. - // These are under investigation, and hopefully not here for the long term. - case __NR_shmctl: - case __NR_shmat: - case __NR_shmdt: - return playground2::Sandbox::SB_ALLOWED; - case __NR_ioctl: - return ENOTTY; // Flash Access. - case __NR_socket: - return EACCES; - default: - if (IsGettimeSyscall(sysno) || - IsKillSyscall(sysno)) { - return playground2::Sandbox::SB_ALLOWED; - } - if (IsFileSystemSyscall(sysno)) { - return ENOENT; - } - // In any other case crash the program with our SIGSYS handler. - return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL); - } -} -#endif - -playground2::Sandbox::ErrorCode BlacklistPtracePolicy(int sysno) { - if (sysno < static_cast<int>(MIN_SYSCALL) || - sysno > static_cast<int>(MAX_SYSCALL)) { - // TODO(jln) we should not have to do that in a trivial policy. - return ENOSYS; - } - switch (sysno) { - case __NR_ptrace: - case __NR_process_vm_readv: - case __NR_process_vm_writev: - case __NR_migrate_pages: - case __NR_move_pages: - return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL); - default: - return playground2::Sandbox::SB_ALLOWED; - } -} - -// Allow all syscalls. -// This will still deny x32 or IA32 calls in 64 bits mode or -// 64 bits system calls in compatibility mode. -playground2::Sandbox::ErrorCode AllowAllPolicy(int sysno) { - if (sysno < static_cast<int>(MIN_SYSCALL) || - sysno > static_cast<int>(MAX_SYSCALL)) { - // TODO(jln) we should not have to do that in a trivial policy. - return ENOSYS; - } else { - return playground2::Sandbox::SB_ALLOWED; - } -} - -// Warms up/preloads resources needed by the policies. -void WarmupPolicy(playground2::Sandbox::EvaluateSyscall policy) { -#if defined(__x86_64__) - if (policy == GpuProcessPolicy_x86_64) { - OpenWithCache(kDriRcPath, O_RDONLY); - // Accelerated video decode dlopen()'s this shared object - // inside the sandbox, so preload it now. - // TODO(jorgelo): generalize this to other platforms. - if (IsAcceleratedVideoDecodeEnabled()) { - const char kI965DrvVideoPath_64[] = - "/usr/lib64/va/drivers/i965_drv_video.so"; - dlopen(kI965DrvVideoPath_64, RTLD_NOW|RTLD_GLOBAL|RTLD_NODELETE); - } - } -#endif -} - -// Is the sandbox fully disabled for this process? -bool ShouldDisableBpfSandbox(const CommandLine& command_line, - const std::string& process_type) { - if (command_line.HasSwitch(switches::kNoSandbox) || - command_line.HasSwitch(switches::kDisableSeccompFilterSandbox)) { - return true; - } - - if (process_type == switches::kGpuProcess) { - // The GPU sandbox is disabled by default in ChromeOS, enabled by default on - // generic Linux. - // TODO(jorgelo): when we feel comfortable, make this a policy decision - // instead. (i.e. move this to GetProcessSyscallPolicy) and return an - // AllowAllPolicy for lack of "--enable-gpu-sandbox". - bool should_disable; - if (IsChromeOS()) { - should_disable = true; - } else { - should_disable = false; - } - - if (command_line.HasSwitch(switches::kEnableGpuSandbox)) - should_disable = false; - if (command_line.HasSwitch(switches::kDisableGpuSandbox)) - should_disable = true; - return should_disable; - } - - return false; -} - -playground2::Sandbox::EvaluateSyscall GetProcessSyscallPolicy( - const CommandLine& command_line, - const std::string& process_type) { -#if defined(__x86_64__) - if (process_type == switches::kGpuProcess) { - return GpuProcessPolicy_x86_64; - } +#include "content/public/common/sandbox_init.h" - if (process_type == switches::kPpapiPluginProcess) { - // TODO(jln): figure out what to do with non-Flash PPAPI - // out-of-process plug-ins. - return FlashProcessPolicy_x86_64; - } +namespace content { - if (process_type == switches::kRendererProcess || - process_type == switches::kWorkerProcess) { - return BlacklistPtracePolicy; - } - NOTREACHED(); - // This will be our default if we need one. - return AllowAllPolicy; -#else - // On IA32, we only have a small blacklist at the moment. - (void) process_type; - return BlacklistPtracePolicy; -#endif // __x86_64__ -} +// TODO(jln): have call sites provide a process / policy type to +// InitializeSandbox(). +void InitializeSandbox() { + bool seccomp_legacy_started = false; + LinuxSandbox* linux_sandbox = LinuxSandbox::GetInstance(); + const std::string process_type = + CommandLine::ForCurrentProcess()->GetSwitchValueASCII( + switches::kProcessType); -// Initialize the seccomp-bpf sandbox. -bool InitializeBpfSandbox_x86(const CommandLine& command_line, - const std::string& process_type) { - if (ShouldDisableBpfSandbox(command_line, process_type)) - return false; - // No matter what, InitializeSandbox() should always be called before threads - // are started. - // Note: IsSingleThreaded() will be true if /proc is not accessible! - if (!IsSingleThreaded()) { + // No matter what, it's always an error to call InitializeSandbox() after + // threads have been created. + if (!linux_sandbox->IsSingleThreaded()) { std::string error_message = "InitializeSandbox() called with multiple " "threads in process " + process_type; // TODO(jln): change this into a CHECK() once we are more comfortable it // does not trigger. - // On non-DEBUG build, we still log an error LOG(ERROR) << error_message; - return false; - } - - // TODO(jln): find a way for the Zygote processes under the setuid sandbox to - // have a /proc fd and pass it here. - // Passing -1 as the /proc fd since we have no special way to have it for - // now. - if (playground2::Sandbox::supportsSeccompSandbox(-1) != - playground2::Sandbox::STATUS_AVAILABLE) { - return false; + return; } - playground2::Sandbox::EvaluateSyscall SyscallPolicy = - GetProcessSyscallPolicy(command_line, process_type); - - // Warms up resources needed by the policy we're about to enable. - WarmupPolicy(SyscallPolicy); - - playground2::Sandbox::setSandboxPolicy(SyscallPolicy, NULL); - playground2::Sandbox::startSandbox(); - - return true; -} - -} // anonymous namespace - -#endif // defined(__i386__) || defined(__x86_64__) - -namespace content { - -void InitializeSandbox() { -#if defined(__i386__) || defined(__x86_64__) - const CommandLine& command_line = *CommandLine::ForCurrentProcess(); - const std::string process_type = - command_line.GetSwitchValueASCII(switches::kProcessType); - bool seccomp_legacy_started = false; - bool seccomp_bpf_started = false; - // First, try to enable seccomp-legacy. - seccomp_legacy_started = - LinuxSandbox::GetInstance()->StartSeccompLegacy(process_type); - if (seccomp_legacy_started) - LogSandboxStarted("seccomp-legacy", process_type); + seccomp_legacy_started = linux_sandbox->StartSeccompLegacy(process_type); // Then, try to enable seccomp-bpf. // If seccomp-legacy is enabled, seccomp-bpf initialization will crash // instead of failing gracefully. // TODO(markus): fix this (crbug.com/139872). if (!seccomp_legacy_started) { - seccomp_bpf_started = - InitializeBpfSandbox_x86(command_line, process_type); + linux_sandbox->StartSeccompBpf(process_type); } - if (seccomp_bpf_started) - LogSandboxStarted("seccomp-bpf", process_type); -#endif } } // namespace content diff --git a/content/common/sandbox_linux.cc b/content/common/sandbox_linux.cc index a0bb49a..33676dd 100644 --- a/content/common/sandbox_linux.cc +++ b/content/common/sandbox_linux.cc @@ -8,20 +8,33 @@ #include "base/command_line.h" #include "base/eintr_wrapper.h" +#include "base/file_util.h" #include "base/logging.h" #include "base/memory/singleton.h" +#include "base/time.h" #include "content/common/sandbox_linux.h" #include "content/common/seccomp_sandbox.h" +#include "content/common/sandbox_seccomp_bpf_linux.h" #include "content/public/common/content_switches.h" #include "content/public/common/sandbox_linux.h" #include "sandbox/linux/suid/client/setuid_sandbox_client.h" -#if defined(SECCOMP_BPF_SANDBOX) -#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" -#endif - namespace { +void LogSandboxStarted(const std::string& sandbox_name) { + const CommandLine& command_line = *CommandLine::ForCurrentProcess(); + const std::string process_type = + command_line.GetSwitchValueASCII(switches::kProcessType); + const std::string activated_sandbox = + "Activated " + sandbox_name + " sandbox for process type: " + + process_type + "."; +#if defined(OS_CHROMEOS) + LOG(WARNING) << activated_sandbox; +#else + VLOG(1) << activated_sandbox; +#endif +} + // Implement the command line enabling logic for seccomp-legacy. bool IsSeccompLegacyDesired() { #if defined(SECCOMP_SANDBOX) @@ -38,6 +51,17 @@ bool IsSeccompLegacyDesired() { return false; } +// Our "policy" on whether or not to enable seccomp-legacy. Only renderers are +// supported. +bool ShouldEnableSeccompLegacy(const std::string& process_type) { + if (IsSeccompLegacyDesired() && + process_type == switches::kRendererProcess) { + return true; + } else { + return false; + } +} + } // namespace namespace content { @@ -46,6 +70,7 @@ LinuxSandbox::LinuxSandbox() : proc_fd_(-1), pre_initialized_(false), seccomp_legacy_supported_(false), + seccomp_bpf_supported_(false), setuid_sandbox_client_(sandbox::SetuidSandboxClient::Create()) { if (setuid_sandbox_client_ == NULL) { LOG(FATAL) << "Failed to instantiate the setuid sandbox client."; @@ -64,6 +89,7 @@ LinuxSandbox* LinuxSandbox::GetInstance() { void LinuxSandbox::PreinitializeSandboxBegin() { CHECK(!pre_initialized_); seccomp_legacy_supported_ = false; + seccomp_bpf_supported_ = false; #if defined(SECCOMP_SANDBOX) if (IsSeccompLegacyDesired()) { proc_fd_ = open("/proc", O_DIRECTORY | O_RDONLY); @@ -84,15 +110,16 @@ void LinuxSandbox::PreinitializeSandboxBegin() { } } #endif // SECCOMP_SANDBOX -#if defined(SECCOMP_BPF_SANDBOX) // Similarly, we "pre-warm" the code that detects supports for seccomp BPF. // TODO(jln): Use proc_fd_ here too once we're comfortable it does not create // an additional security risk. - if (playground2::Sandbox::supportsSeccompSandbox(-1) != - playground2::Sandbox::STATUS_AVAILABLE) { - VLOG(1) << "Lacking support for seccomp-bpf sandbox."; + if (SandboxSeccompBpf::IsSeccompBpfDesired()) { + if (!SandboxSeccompBpf::SupportsSandbox()) { + VLOG(1) << "Lacking support for seccomp-bpf sandbox."; + } else { + seccomp_bpf_supported_ = true; + } } -#endif // SECCOMP_BPF_SANDBOX pre_initialized_ = true; } @@ -119,7 +146,7 @@ void LinuxSandbox::PreinitializeSandbox(const std::string& process_type) { PreinitializeSandboxFinish(process_type); } -int LinuxSandbox::GetStatus() { +int LinuxSandbox::GetStatus() const { CHECK(pre_initialized_); int sandbox_flags = 0; if (setuid_sandbox_client_->IsSandboxed()) { @@ -129,12 +156,28 @@ int LinuxSandbox::GetStatus() { if (setuid_sandbox_client_->IsInNewNETNamespace()) sandbox_flags |= kSandboxLinuxNetNS; } - if (seccomp_legacy_supported_) { + if (seccomp_legacy_supported() && + ShouldEnableSeccompLegacy(switches::kRendererProcess)) { + // We report whether the sandbox will be activated when renderers go + // through sandbox initialization. sandbox_flags |= kSandboxLinuxSeccomp; } return sandbox_flags; } +bool LinuxSandbox::IsSingleThreaded() const { + // TODO(jln): re-implement this properly and use our proc_fd_ if available. + // Possibly racy, but it's ok because this is more of a debug check to catch + // new threaded situations arising during development. + int num_threads = file_util::CountFilesCreatedAfter( + FilePath("/proc/self/task"), + base::Time::UnixEpoch()); + + // We pass the test if we don't know ( == 0), because the setuid sandbox + // will prevent /proc access in some contexts. + return num_threads == 1 || num_threads == 0; +} + sandbox::SetuidSandboxClient* LinuxSandbox::setuid_sandbox_client() const { return setuid_sandbox_client_.get(); @@ -144,13 +187,14 @@ sandbox::SetuidSandboxClient* bool LinuxSandbox::StartSeccompLegacy(const std::string& process_type) { if (!pre_initialized_) PreinitializeSandbox(process_type); - if (ShouldEnableSeccompLegacy(process_type)) { + if (seccomp_legacy_supported() && ShouldEnableSeccompLegacy(process_type)) { // SupportsSeccompSandbox() returns a cached result, as we already // called it earlier in the PreinitializeSandbox(). Thus, it is OK for us // to not pass in a file descriptor for "/proc". #if defined(SECCOMP_SANDBOX) if (SupportsSeccompSandbox(-1)) { StartSeccompSandbox(); + LogSandboxStarted("seccomp-legacy"); return true; } #endif @@ -158,26 +202,28 @@ bool LinuxSandbox::StartSeccompLegacy(const std::string& process_type) { return false; } -// For seccomp-bpf, we will use the seccomp-bpf policy class. -// TODO(jln): implement this. +// For seccomp-bpf, we use the SandboxSeccompBpf class. bool LinuxSandbox::StartSeccompBpf(const std::string& process_type) { + if (!pre_initialized_) + PreinitializeSandbox(process_type); + bool started_bpf_sandbox = false; + if (seccomp_bpf_supported()) + started_bpf_sandbox = SandboxSeccompBpf::StartSandbox(process_type); + + if (started_bpf_sandbox) + LogSandboxStarted("seccomp-bpf"); + + return started_bpf_sandbox; +} + +bool LinuxSandbox::seccomp_legacy_supported() const { CHECK(pre_initialized_); - NOTREACHED(); - return false; + return seccomp_legacy_supported_; } -// Our "policy" on whether or not to enable seccomp-legacy. Only renderers are -// supported. -bool LinuxSandbox::ShouldEnableSeccompLegacy( - const std::string& process_type) { +bool LinuxSandbox::seccomp_bpf_supported() const { CHECK(pre_initialized_); - if (IsSeccompLegacyDesired() && - seccomp_legacy_supported_ && - process_type == switches::kRendererProcess) { - return true; - } else { - return false; - } + return seccomp_bpf_supported_; } } // namespace content diff --git a/content/common/sandbox_linux.h b/content/common/sandbox_linux.h index 8502dfb..8855c31 100644 --- a/content/common/sandbox_linux.h +++ b/content/common/sandbox_linux.h @@ -5,14 +5,12 @@ #ifndef CONTENT_COMMON_SANDBOX_LINUX_H_ #define CONTENT_COMMON_SANDBOX_LINUX_H_ +#include <string> + +#include "base/basictypes.h" #include "base/memory/scoped_ptr.h" #include "content/public/common/sandbox_linux.h" -// TODO(jln) move this somewhere else. -#if defined(__i386__) || defined(__x86_64__) -#define SECCOMP_BPF_SANDBOX -#endif - template <typename T> struct DefaultSingletonTraits; namespace sandbox { class SetuidSandboxClient; } @@ -60,7 +58,9 @@ class LinuxSandbox { // Since we need to provide the status before the sandboxes are actually // started, this returns what will actually happen once the various Start* // functions are called from inside a renderer. - int GetStatus(); + int GetStatus() const; + // Is the current process single threaded ? + bool IsSingleThreaded() const; // Simple accessor for our instance of the setuid sandbox. Will never return // NULL. @@ -71,21 +71,24 @@ class LinuxSandbox { // Check the policy and eventually start the seccomp-legacy sandbox. bool StartSeccompLegacy(const std::string& process_type); // Check the policy and eventually start the seccomp-bpf sandbox. - // TODO(jln): not implemented at the moment. bool StartSeccompBpf(const std::string& process_type); private: friend struct DefaultSingletonTraits<LinuxSandbox>; - bool ShouldEnableSeccompLegacy(const std::string& process_type); + + // We must have been pre_initialized_ before using either of these. + bool seccomp_legacy_supported() const; + bool seccomp_bpf_supported() const; int proc_fd_; // Have we been through PreinitializeSandbox or PreinitializeSandboxBegin ? bool pre_initialized_; bool seccomp_legacy_supported_; // Accurate if pre_initialized_. + bool seccomp_bpf_supported_; // Accurate if pre_initialized_. scoped_ptr<sandbox::SetuidSandboxClient> setuid_sandbox_client_; - LinuxSandbox(); + ~LinuxSandbox(); - DISALLOW_COPY_AND_ASSIGN(LinuxSandbox); + DISALLOW_IMPLICIT_CONSTRUCTORS(LinuxSandbox); }; } // namespace content diff --git a/content/common/sandbox_seccomp_bpf_linux.cc b/content/common/sandbox_seccomp_bpf_linux.cc new file mode 100644 index 0000000..3468413 --- /dev/null +++ b/content/common/sandbox_seccomp_bpf_linux.cc @@ -0,0 +1,539 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <asm/unistd.h> +#include <dlfcn.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/audit.h> +#include <linux/filter.h> +#include <signal.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <ucontext.h> +#include <unistd.h> + +#include <vector> + +#include "base/command_line.h" +#include "base/logging.h" +#include "content/common/sandbox_linux.h" +#include "content/common/sandbox_seccomp_bpf_linux.h" +#include "content/public/common/content_switches.h" + +// These are the only architectures supported for now. +#if defined(__i386__) || defined(__x86_64__) +#define SECCOMP_BPF_SANDBOX +#endif + +#if defined(SECCOMP_BPF_SANDBOX) +#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h" + +// These are fairly new and not defined in all headers yet. +#if defined(__x86_64__) + +#ifndef __NR_process_vm_readv + #define __NR_process_vm_readv 310 +#endif + +#ifndef __NR_process_vm_writev + #define __NR_process_vm_writev 311 +#endif + +#elif defined(__i386__) + +#ifndef __NR_process_vm_readv + #define __NR_process_vm_readv 347 +#endif + +#ifndef __NR_process_vm_writev + #define __NR_process_vm_writev 348 +#endif + +#endif + +namespace { + +inline bool IsChromeOS() { +#if defined(OS_CHROMEOS) + return true; +#else + return false; +#endif +} + +void LogSandboxStarted(const std::string& sandbox_name, + const std::string& process_type) { + const std::string activated_sandbox = + "Activated " + sandbox_name + " sandbox for process type: " + + process_type + "."; + if (IsChromeOS()) { + LOG(WARNING) << activated_sandbox; + } else { + VLOG(1) << activated_sandbox; + } +} + +intptr_t CrashSIGSYS_Handler(const struct arch_seccomp_data& args, void* aux) { + int syscall = args.nr; + if (syscall >= 1024) + syscall = 0; + // Encode 8-bits of the 1st two arguments too, so we can discern which socket + // type, which fcntl, ... etc., without being likely to hit a mapped + // address. + // Do not encode more bits here without thinking about increasing the + // likelihood of collision with mapped pages. + syscall |= ((args.args[0] & 0xffUL) << 12); + syscall |= ((args.args[1] & 0xffUL) << 20); + // Purposefully dereference the syscall as an address so it'll show up very + // clearly and easily in crash dumps. + volatile char* addr = reinterpret_cast<volatile char*>(syscall); + *addr = '\0'; + // In case we hit a mapped address, hit the null page with just the syscall, + // for paranoia. + syscall &= 0xfffUL; + addr = reinterpret_cast<volatile char*>(syscall); + *addr = '\0'; + for (;;) + _exit(1); +} + +// TODO(jln) we need to restrict the first parameter! +bool IsKillSyscall(int sysno) { + switch (sysno) { + case __NR_kill: + case __NR_tkill: + case __NR_tgkill: + return true; + default: + return false; + } +} + +bool IsGettimeSyscall(int sysno) { + switch (sysno) { + case __NR_clock_gettime: + case __NR_gettimeofday: + case __NR_time: + return true; + default: + return false; + } +} + +bool IsFileSystemSyscall(int sysno) { + switch (sysno) { + case __NR_open: + case __NR_openat: + case __NR_execve: + case __NR_access: + case __NR_mkdir: + case __NR_mkdirat: + case __NR_readlink: + case __NR_readlinkat: + case __NR_stat: + case __NR_lstat: + case __NR_chdir: + case __NR_mknod: + case __NR_mknodat: + return true; + default: + return false; + } +} + +bool IsAcceleratedVideoDecodeEnabled() { + // Accelerated video decode is currently enabled on Chrome OS, + // but not on Linux: crbug.com/137247. + bool is_enabled = IsChromeOS(); + + const CommandLine& command_line = *CommandLine::ForCurrentProcess(); + is_enabled = is_enabled && + !command_line.HasSwitch(switches::kDisableAcceleratedVideoDecode); + + return is_enabled; +} + +static const char kDriRcPath[] = "/etc/drirc"; + +// TODO(jorgelo): limited to /etc/drirc for now, extend this to cover +// other sandboxed file access cases. +int OpenWithCache(const char* pathname, int flags) { + static int drircfd = -1; + static bool do_open = true; + int res = -1; + + if (strcmp(pathname, kDriRcPath) == 0 && flags == O_RDONLY) { + if (do_open) { + drircfd = open(pathname, flags); + do_open = false; + res = drircfd; + } else { + // dup() man page: + // "After a successful return from one of these system calls, + // the old and new file descriptors may be used interchangeably. + // They refer to the same open file description and thus share + // file offset and file status flags; for example, if the file offset + // is modified by using lseek(2) on one of the descriptors, + // the offset is also changed for the other." + // Since |drircfd| can be dup()'ed and read many times, we need to + // lseek() it to the beginning of the file before returning. + // We assume the caller will not keep more than one fd open at any + // one time. Intel driver code in Mesa that parses /etc/drirc does + // open()/read()/close() in the same function. + if (drircfd < 0) { + errno = ENOENT; + return -1; + } + int newfd = dup(drircfd); + if (newfd < 0) { + errno = ENOMEM; + return -1; + } + if (lseek(newfd, 0, SEEK_SET) == static_cast<off_t>(-1)) { + (void) HANDLE_EINTR(close(newfd)); + errno = ENOMEM; + return -1; + } + res = newfd; + } + } else { + res = open(pathname, flags); + } + + return res; +} + +// We allow the GPU process to open /etc/drirc because it's needed by Mesa. +// OpenWithCache() has been called before enabling the sandbox, and has cached +// a file descriptor for /etc/drirc. +intptr_t GpuOpenSIGSYS_Handler(const struct arch_seccomp_data& args, + void* aux) { + uint64_t arg0 = args.args[0]; + uint64_t arg1 = args.args[1]; + const char* pathname = reinterpret_cast<const char*>(arg0); + int flags = static_cast<int>(arg1); + + if (strcmp(pathname, kDriRcPath) == 0) { + int ret = OpenWithCache(pathname, flags); + return (ret == -1) ? -errno : ret; + } else { + return -ENOENT; + } +} + +#if defined(__x86_64__) +// x86_64 only because it references system calls that are multiplexed on IA32. +playground2::Sandbox::ErrorCode GpuProcessPolicy_x86_64(int sysno) { + switch(sysno) { + case __NR_read: + case __NR_ioctl: + case __NR_poll: + case __NR_epoll_wait: + case __NR_recvfrom: + case __NR_write: + case __NR_writev: + case __NR_gettid: + case __NR_sched_yield: // Nvidia binary driver. + + case __NR_futex: + case __NR_madvise: + case __NR_sendmsg: + case __NR_recvmsg: + case __NR_eventfd2: + case __NR_pipe: + case __NR_mmap: + case __NR_mprotect: + case __NR_clone: // TODO(jln) restrict flags. + case __NR_set_robust_list: + case __NR_getuid: + case __NR_geteuid: + case __NR_getgid: + case __NR_getegid: + case __NR_epoll_create: + case __NR_fcntl: + case __NR_socketpair: + case __NR_epoll_ctl: + case __NR_prctl: + case __NR_fstat: + case __NR_close: + case __NR_restart_syscall: + case __NR_rt_sigreturn: + case __NR_brk: + case __NR_rt_sigprocmask: + case __NR_munmap: + case __NR_dup: + case __NR_mlock: + case __NR_munlock: + case __NR_exit: + case __NR_exit_group: + case __NR_lseek: + case __NR_getpid: // Nvidia binary driver. + case __NR_getppid: // ATI binary driver. + case __NR_shutdown: // Virtual driver. + case __NR_rt_sigaction: // Breakpad signal handler. + return playground2::Sandbox::SB_ALLOWED; + case __NR_socket: + return EACCES; // Nvidia binary driver. + case __NR_fchmod: + return EPERM; // ATI binary driver. + case __NR_open: + // Accelerated video decode is enabled by default only on Chrome OS. + if (IsAcceleratedVideoDecodeEnabled()) { + // Accelerated video decode needs to open /dev/dri/card0, and + // dup()'ing an already open file descriptor does not work. + // Allow open() even though it severely weakens the sandbox, + // to test the sandboxing mechanism in general. + // TODO(jorgelo): remove this once we solve the libva issue. + return playground2::Sandbox::SB_ALLOWED; + } else { + // Hook open() in the GPU process to allow opening /etc/drirc, + // needed by Mesa. + // The hook needs dup(), lseek(), and close() to be allowed. + return playground2::Sandbox::ErrorCode(GpuOpenSIGSYS_Handler, NULL); + } + default: + if (IsGettimeSyscall(sysno) || + IsKillSyscall(sysno)) { // GPU watchdog. + return playground2::Sandbox::SB_ALLOWED; + } + // Generally, filename-based syscalls will fail with ENOENT to behave + // similarly to a possible future setuid sandbox. + if (IsFileSystemSyscall(sysno)) { + return ENOENT; + } + // In any other case crash the program with our SIGSYS handler + return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL); + } +} + +// x86_64 only because it references system calls that are multiplexed on IA32. +playground2::Sandbox::ErrorCode FlashProcessPolicy_x86_64(int sysno) { + switch (sysno) { + case __NR_futex: + case __NR_write: + case __NR_epoll_wait: + case __NR_read: + case __NR_times: + case __NR_clone: // TODO(jln): restrict flags. + case __NR_set_robust_list: + case __NR_getuid: + case __NR_geteuid: + case __NR_getgid: + case __NR_getegid: + case __NR_epoll_create: + case __NR_fcntl: + case __NR_socketpair: + case __NR_pipe: + case __NR_epoll_ctl: + case __NR_gettid: + case __NR_prctl: + case __NR_fstat: + case __NR_sendmsg: + case __NR_mmap: + case __NR_munmap: + case __NR_mprotect: + case __NR_madvise: + case __NR_rt_sigaction: + case __NR_rt_sigprocmask: + case __NR_wait4: + case __NR_exit_group: + case __NR_exit: + case __NR_rt_sigreturn: + case __NR_restart_syscall: + case __NR_close: + case __NR_recvmsg: + case __NR_lseek: + case __NR_brk: + case __NR_sched_yield: + case __NR_shutdown: + case __NR_sched_getaffinity: + case __NR_sched_setscheduler: + case __NR_dup: // Flash Access. + // These are under investigation, and hopefully not here for the long term. + case __NR_shmctl: + case __NR_shmat: + case __NR_shmdt: + return playground2::Sandbox::SB_ALLOWED; + case __NR_ioctl: + return ENOTTY; // Flash Access. + case __NR_socket: + return EACCES; + default: + if (IsGettimeSyscall(sysno) || + IsKillSyscall(sysno)) { + return playground2::Sandbox::SB_ALLOWED; + } + if (IsFileSystemSyscall(sysno)) { + return ENOENT; + } + // In any other case crash the program with our SIGSYS handler. + return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL); + } +} +#endif + +playground2::Sandbox::ErrorCode BlacklistPtracePolicy(int sysno) { + if (sysno < static_cast<int>(MIN_SYSCALL) || + sysno > static_cast<int>(MAX_SYSCALL)) { + // TODO(jln) we should not have to do that in a trivial policy. + return ENOSYS; + } + switch (sysno) { + case __NR_ptrace: + case __NR_process_vm_readv: + case __NR_process_vm_writev: + case __NR_migrate_pages: + case __NR_move_pages: + return playground2::Sandbox::ErrorCode(CrashSIGSYS_Handler, NULL); + default: + return playground2::Sandbox::SB_ALLOWED; + } +} + +// Allow all syscalls. +// This will still deny x32 or IA32 calls in 64 bits mode or +// 64 bits system calls in compatibility mode. +playground2::Sandbox::ErrorCode AllowAllPolicy(int sysno) { + if (sysno < static_cast<int>(MIN_SYSCALL) || + sysno > static_cast<int>(MAX_SYSCALL)) { + // TODO(jln) we should not have to do that in a trivial policy. + return ENOSYS; + } else { + return playground2::Sandbox::SB_ALLOWED; + } +} + +// Warms up/preloads resources needed by the policies. +void WarmupPolicy(playground2::Sandbox::EvaluateSyscall policy) { +#if defined(__x86_64__) + if (policy == GpuProcessPolicy_x86_64) { + OpenWithCache(kDriRcPath, O_RDONLY); + // Accelerated video decode dlopen()'s this shared object + // inside the sandbox, so preload it now. + // TODO(jorgelo): generalize this to other platforms. + if (IsAcceleratedVideoDecodeEnabled()) { + const char kI965DrvVideoPath_64[] = + "/usr/lib64/va/drivers/i965_drv_video.so"; + dlopen(kI965DrvVideoPath_64, RTLD_NOW|RTLD_GLOBAL|RTLD_NODELETE); + } + } +#endif +} + +// Is the sandbox fully disabled for this process? +bool ShouldDisableBpfSandbox(const CommandLine& command_line, + const std::string& process_type) { + if (process_type == switches::kGpuProcess) { + // The GPU sandbox is disabled by default in ChromeOS, enabled by default on + // generic Linux. + // TODO(jorgelo): when we feel comfortable, make this a policy decision + // instead. (i.e. move this to GetProcessSyscallPolicy) and return an + // AllowAllPolicy for lack of "--enable-gpu-sandbox". + bool should_disable; + if (IsChromeOS()) { + should_disable = true; + } else { + should_disable = false; + } + + if (command_line.HasSwitch(switches::kEnableGpuSandbox)) + should_disable = false; + if (command_line.HasSwitch(switches::kDisableGpuSandbox)) + should_disable = true; + return should_disable; + } + + return false; +} + +playground2::Sandbox::EvaluateSyscall GetProcessSyscallPolicy( + const CommandLine& command_line, + const std::string& process_type) { +#if defined(__x86_64__) + if (process_type == switches::kGpuProcess) { + return GpuProcessPolicy_x86_64; + } + + if (process_type == switches::kPpapiPluginProcess) { + // TODO(jln): figure out what to do with non-Flash PPAPI + // out-of-process plug-ins. + return FlashProcessPolicy_x86_64; + } + + if (process_type == switches::kRendererProcess || + process_type == switches::kWorkerProcess) { + return BlacklistPtracePolicy; + } + NOTREACHED(); + // This will be our default if we need one. + return AllowAllPolicy; +#else + // On IA32, we only have a small blacklist at the moment. + (void) process_type; + return BlacklistPtracePolicy; +#endif // __x86_64__ +} + +// Initialize the seccomp-bpf sandbox. +bool StartBpfSandbox_x86(const CommandLine& command_line, + const std::string& process_type) { + playground2::Sandbox::EvaluateSyscall SyscallPolicy = + GetProcessSyscallPolicy(command_line, process_type); + + // Warms up resources needed by the policy we're about to enable. + WarmupPolicy(SyscallPolicy); + + playground2::Sandbox::setSandboxPolicy(SyscallPolicy, NULL); + playground2::Sandbox::startSandbox(); + + return true; +} + +} // namespace + +#endif // SECCOMP_BPF_SANDBOX + +namespace content { + +// Is seccomp BPF globally enabled? +bool SandboxSeccompBpf::IsSeccompBpfDesired() { + const CommandLine& command_line = *CommandLine::ForCurrentProcess(); + if (!command_line.HasSwitch(switches::kNoSandbox) && + !command_line.HasSwitch(switches::kDisableSeccompFilterSandbox)) { + return true; + } else { + return false; + } +} + +bool SandboxSeccompBpf::SupportsSandbox() { +#if defined(SECCOMP_BPF_SANDBOX) + // TODO(jln): pass the saved proc_fd_ from the LinuxSandbox singleton + // here. + if (playground2::Sandbox::supportsSeccompSandbox(-1) == + playground2::Sandbox::STATUS_AVAILABLE) { + return true; + } +#endif + return false; +} + +bool SandboxSeccompBpf::StartSandbox(const std::string& process_type) { +#if defined(SECCOMP_BPF_SANDBOX) + const CommandLine& command_line = *CommandLine::ForCurrentProcess(); + + if (IsSeccompBpfDesired() && // Global switches policy. + // Process-specific policy. + !ShouldDisableBpfSandbox(command_line, process_type) && + SupportsSandbox()) { + return StartBpfSandbox_x86(command_line, process_type); + } +#endif + return false; +} + +} // namespace content diff --git a/content/common/sandbox_seccomp_bpf_linux.h b/content/common/sandbox_seccomp_bpf_linux.h new file mode 100644 index 0000000..694797d3 --- /dev/null +++ b/content/common/sandbox_seccomp_bpf_linux.h @@ -0,0 +1,31 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_COMMON_SANDBOX_SECCOMP_BPF_LINUX_H_ +#define CONTENT_COMMON_SANDBOX_SECCOMP_BPF_LINUX_H_ + +#include "base/basictypes.h" + +namespace content { + +class SandboxSeccompBpf { + public: + // Is the sandbox globally enabled, can anything use it at all ? + static bool IsSeccompBpfDesired(); + // Should the sandbox be enabled for process_type ? + static bool ShouldEnableSeccompBpf(const std::string& process_type); + // Check if the kernel supports this sandbox. It's useful to "prewarm" + // this, part of the result will be cached. + static bool SupportsSandbox(); + // Start the sandbox and apply the policy for process_type. + static bool StartSandbox(const std::string& process_type); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(SandboxSeccompBpf); +}; + +} // namespace content + +#endif // CONTENT_COMMON_SANDBOX_SECCOMP_BPF_LINUX_H_ + diff --git a/content/content_common.gypi b/content/content_common.gypi index 62abb78..577a088 100644 --- a/content/content_common.gypi +++ b/content/content_common.gypi @@ -324,6 +324,8 @@ 'common/sandbox_linux.cc', 'common/sandbox_policy.cc', 'common/sandbox_policy.h', + 'common/sandbox_seccomp_bpf_linux.cc', + 'common/sandbox_seccomp_bpf_linux.h', 'common/savable_url_schemes.cc', 'common/savable_url_schemes.h', 'common/set_process_title.cc', |