summaryrefslogtreecommitdiffstats
path: root/sandbox/linux/seccomp/trusted_thread.cc
diff options
context:
space:
mode:
authormarkus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-20 18:05:23 +0000
committermarkus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-20 18:05:23 +0000
commit00248036bdbbcd913adb34193cf21982a2fdc82a (patch)
tree15007b2ef02f6291e2a3f312c928ab2f6f0229bf /sandbox/linux/seccomp/trusted_thread.cc
parentae654689c8fae6eb523054f073ce8baabf4ec2c5 (diff)
downloadchromium_src-00248036bdbbcd913adb34193cf21982a2fdc82a.zip
chromium_src-00248036bdbbcd913adb34193cf21982a2fdc82a.tar.gz
chromium_src-00248036bdbbcd913adb34193cf21982a2fdc82a.tar.bz2
Make the use of signals inside of the sandbox safe.
We previously assumed that no signals would ever be enabled in the sandbox and thus there was no way to trick the trusted thread into executing potentially untrusted code. In an attempt to lift this restriction, this changelist modifies the trusted thread so that - it has an invalid stack pointer at all times. Any attempt to handle a signal would result in the kernel trying to push a signal stack, which would immediately result in a SEGV and then terminate the application. - all signals are blocked while outside of trusted code. If a signal is triggered, it either gets handled on one of the sandboxed threads (for asynchronous signals), or it results in the application getting terminated by the kernel (for synchronous signals). This changelist is difficult not only because eliminating all uses of the stack pointer requires some very careful assembly coding, but more importantly because we have to restore signals after we enter seccomp mode. As sigprocmask() is a restricted system call, the only way to restore the signal mask is by calling sigreturn() with a suitably tweaked signal stack frame. While the first couple of bytes of the signal stack frame are well-defined and unlikely to change, the entire signal stack frame is not documented as part of the stable ABI. The exact format depends on the number of modified CPU registers (e.g. SSE, MMX, floating point, ...) The only way for us to get a valid signal stack frame is to trigger a signal, and to create a (possibly adjusted) copy of the signal frame. We obviously have to do this _before_ we block all signals upon entering trusted code. The two places where this needs to happen is upon start of the sandbox when launching the initial trusted thread, and upon any call to clone(). BUG=37728 TEST=Run chrome and verify that /proc/$PID/status shows the correct signal mask for trusted threads. The latter can be identified with strace. Review URL: http://codereview.chromium.org/1594040 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@45055 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'sandbox/linux/seccomp/trusted_thread.cc')
-rw-r--r--sandbox/linux/seccomp/trusted_thread.cc804
1 files changed, 519 insertions, 285 deletions
diff --git a/sandbox/linux/seccomp/trusted_thread.cc b/sandbox/linux/seccomp/trusted_thread.cc
index c73091c..240e65f 100644
--- a/sandbox/linux/seccomp/trusted_thread.cc
+++ b/sandbox/linux/seccomp/trusted_thread.cc
@@ -21,6 +21,44 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov %0, %%rbp\n" // %rbp = args
"xor %%rbx, %%rbx\n" // initial sequence number
"lea 999f(%%rip), %%r15\n" // continue in same thread
+
+ // Signal handlers are process-wide. This means that for security
+ // reasons, we cannot allow that the trusted thread ever executes any
+ // signal handlers.
+ // We prevent the execution of signal handlers by setting a signal
+ // mask that blocks all signals. In addition, we make sure that the
+ // stack pointer is invalid.
+ // We cannot reset the signal mask until after we have enabled
+ // Seccomp mode. Our sigprocmask() wrapper would normally do this by
+ // raising a signal, modifying the signal mask in the kernel-generated
+ // signal frame, and then calling sigreturn(). This presents a bit of
+ // a Catch-22, as all signals are masked and we can therefore not
+ // raise any signal that would allow us to generate the signal stack
+ // frame.
+ // Instead, we have to create the signal stack frame prior to entering
+ // Seccomp mode. This incidentally also helps us to restore the
+ // signal mask to the same value that it had prior to entering the
+ // sandbox.
+ // The signal wrapper for clone() is the second entry point into this
+ // code (by means of sending an IPC to its trusted thread). It goes
+ // through the same steps of creating a signal stack frame on the
+ // newly created thread's stacks prior to cloning. See clone.cc for
+ // details.
+ "mov $56+0xF000, %%eax\n" // __NR_clone + 0xF000
+ "sub $8, %%rsp\n"
+ "mov %%rsp, %%rdx\n" // push a signal stack frame (see clone.cc)
+ "mov %%rsp, 0(%%rsp)\n"
+ "int $0\n"
+ "mov 0(%%rsp), %%r9\n"
+ "add $8, 0xA0(%%r9)\n" // pop stack upon call to sigreturn()
+ "mov $2, %%rdi\n" // how = SIG_SETMASK
+ "movq $-1, 0(%%rsp)\n"
+ "mov %%rsp, %%rsi\n" // set = full mask
+ "xor %%rdx, %%rdx\n" // old_set = NULL
+ "mov $8, %%r10\n" // mask all 64 signals
+ "mov $14, %%eax\n" // NR_rt_sigprocmask
+ "syscall\n"
+ "xor %%rsp, %%rsp\n" // invalidate the stack in all trusted code
"jmp 20f\n" // create trusted thread
// TODO(markus): Coalesce the read() operations by reading into a bigger
@@ -36,42 +74,44 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// %rbx: sequence number for trusted calls
// Temporary variables:
- // %r9: system call number
+ // %r8: child stack
+ // %r9: system call number, child stack
// %rbp: secure memory of previous thread
// Layout of secure shared memory region (c.f. securemem.h):
// 0x00: pointer to the secure shared memory region (i.e. self)
// 0x08: sequence number; must match %rbx
- // 0x10: system call number; passed to syscall in %rax
- // 0x18: first argument; passed to syscall in %rdi
- // 0x20: second argument; passed to syscall in %rsi
- // 0x28: third argument; passed to syscall in %rdx
- // 0x30: fourth argument; passed to syscall in %r10
- // 0x38: fifth argument; passed to syscall in %r8
- // 0x40: sixth argument; passed to syscall in %r9
- // 0x48: stored return address for clone() system call
- // 0x50: stored %rbp value for clone() system call
- // 0x58: stored %rbx value for clone() system call
- // 0x60: stored %rcx value for clone() system call
- // 0x68: stored %rdx value for clone() system call
- // 0x70: stored %rsi value for clone() system call
- // 0x78: stored %rdi value for clone() system call
- // 0x80: stored %r8 value for clone() system call
- // 0x88: stored %r9 value for clone() system call
- // 0x90: stored %r10 value for clone() system call
- // 0x98: stored %r11 value for clone() system call
- // 0xA0: stored %r12 value for clone() system call
- // 0xA8: stored %r13 value for clone() system call
- // 0xB0: stored %r14 value for clone() system call
- // 0xB8: stored %r15 value for clone() system call
- // 0xC0: new shared memory for clone()
- // 0xC8: processFdPub for talking to trusted process
- // 0xCC: cloneFdPub for talking to trusted process
- // 0xD0: set to non-zero, if in debugging mode
- // 0xD4: most recent SHM id returned by shmget(IPC_PRIVATE)
- // 0xD8: cookie assigned to us by the trusted process (TLS_COOKIE)
- // 0xE0: thread id (TLS_TID)
- // 0xE8: threadFdPub (TLS_THREAD_FD)
+ // 0x10: call type; must match %eax, iff %eax == -1 || %eax == -2
+ // 0x18: system call number; passed to syscall in %rax
+ // 0x20: first argument; passed to syscall in %rdi
+ // 0x28: second argument; passed to syscall in %rsi
+ // 0x30: third argument; passed to syscall in %rdx
+ // 0x38: fourth argument; passed to syscall in %r10
+ // 0x40: fifth argument; passed to syscall in %r8
+ // 0x48: sixth argument; passed to syscall in %r9
+ // 0x50: stored return address for clone() system call
+ // 0x58: stored %rbp value for clone() system call
+ // 0x60: stored %rbx value for clone() system call
+ // 0x68: stored %rcx value for clone() system call
+ // 0x70: stored %rdx value for clone() system call
+ // 0x78: stored %rsi value for clone() system call
+ // 0x80: stored %rdi value for clone() system call
+ // 0x88: stored %r8 value for clone() system call
+ // 0x90: stored %r9 value for clone() system call
+ // 0x98: stored %r10 value for clone() system call
+ // 0xA0: stored %r11 value for clone() system call
+ // 0xA8: stored %r12 value for clone() system call
+ // 0xB0: stored %r13 value for clone() system call
+ // 0xB8: stored %r14 value for clone() system call
+ // 0xC0: stored %r15 value for clone() system call
+ // 0xC8: new shared memory for clone()
+ // 0xD0: processFdPub for talking to trusted process
+ // 0xD4: cloneFdPub for talking to trusted process
+ // 0xD8: set to non-zero, if in debugging mode
+ // 0xDC: most recent SHM id returned by shmget(IPC_PRIVATE)
+ // 0xE0: cookie assigned to us by the trusted process (TLS_COOKIE)
+ // 0xE8: thread id (TLS_TID)
+ // 0xF0: threadFdPub (TLS_THREAD_FD)
// 0x200-0x1000: securely passed verified file name(s)
// Layout of (untrusted) scratch space:
@@ -89,6 +129,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// 0x48: last system call (not used on x86-64)
// 0x4C: number of consecutive calls to a time fnc (not used on x86-64)
// 0x50: nesting level of system calls (for debugging purposes only)
+ // 0x54: signal mask
// We use the %fs register for accessing the secure read-only page, and
// the untrusted scratch space immediately following it. The segment
@@ -103,7 +144,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// read(threadFd, &scratch, 4)
"1:xor %%rax, %%rax\n" // NR_read
"mov %%r13, %%rdi\n" // fd = threadFd
- "mov %%fs:0x0, %%rsi\n"
+ "mov %%fs:0x0, %%rsi\n" // secure_mem
"add $0x1000, %%rsi\n" // buf = &scratch
"mov $4, %%edx\n" // len = 4
"2:syscall\n"
@@ -123,13 +164,15 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"jnz 5f\n"
"3:cmp %%rbx, %%fs:0x8\n"
"jne 25f\n" // exit process
- "mov %%fs:0x10, %%rax\n"
- "mov %%fs:0x18, %%rdi\n"
- "mov %%fs:0x20, %%rsi\n"
- "mov %%fs:0x28, %%rdx\n"
- "mov %%fs:0x30, %%r10\n"
- "mov %%fs:0x38, %%r8\n"
- "mov %%fs:0x40, %%r9\n"
+ "cmp %%fs:0x10, %%eax\n"
+ "jne 25f\n" // exit process
+ "mov %%fs:0x18, %%rax\n"
+ "mov %%fs:0x20, %%rdi\n"
+ "mov %%fs:0x28, %%rsi\n"
+ "mov %%fs:0x30, %%rdx\n"
+ "mov %%fs:0x38, %%r10\n"
+ "mov %%fs:0x40, %%r8\n"
+ "mov %%fs:0x48, %%r9\n"
"cmp %%rbx, %%fs:0x8\n"
"jne 25f\n" // exit process
"add $2, %%rbx\n"
@@ -153,13 +196,13 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
"mov $10, %%eax\n" // NR_mprotect
"syscall\n"
- "mov %%r8d, 0xD4(%%rdi)\n" // set most recently returned SysV shm id
+ "mov %%r8d, 0xDC(%%rdi)\n" // set most recently returned SysV shm id
"xor %%rdi, %%rdi\n"
// When debugging messages are enabled, warn about expensive system calls
#ifndef NDEBUG
- "cmpw $0, %%fs:0xD0\n" // debug mode
- "jz 26f\n"
+ "cmpw $0, %%fs:0xD8\n" // debug mode
+ "jz 27f\n"
"mov $1, %%eax\n" // NR_write
"mov $2, %%edi\n" // fd = stderr
"lea 101f(%%rip), %%rsi\n" // "This is an expensive system call"
@@ -168,7 +211,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"xor %%rdi, %%rdi\n"
#endif
- "jmp 26f\n" // exit program, no message
+ "jmp 27f\n" // exit program, no message
"4:syscall\n"
"jmp 15f\n" // return result
@@ -179,10 +222,12 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"jnz 9f\n"
"cmp %%rbx, %%fs:0x8\n"
"jne 25f\n" // exit process
+ "cmp %%eax, %%fs:0x10\n"
+ "jne 25f\n" // exit process
// When debugging messages are enabled, warn about expensive system calls
#ifndef NDEBUG
- "cmpw $0, %%fs:0xD0\n" // debug mode
+ "cmpw $0, %%fs:0xD8\n" // debug mode
"jz 6f\n"
"mov $1, %%eax\n" // NR_write
"mov $2, %%edi\n" // fd = stderr
@@ -192,13 +237,13 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"6:"
#endif
- "mov %%fs:0x10, %%rax\n"
- "mov %%fs:0x18, %%rdi\n"
- "mov %%fs:0x20, %%rsi\n"
- "mov %%fs:0x28, %%rdx\n"
- "mov %%fs:0x30, %%r10\n"
- "mov %%fs:0x38, %%r8\n"
- "mov %%fs:0x40, %%r9\n"
+ "mov %%fs:0x18, %%rax\n"
+ "mov %%fs:0x20, %%rdi\n"
+ "mov %%fs:0x28, %%rsi\n"
+ "mov %%fs:0x30, %%rdx\n"
+ "mov %%fs:0x38, %%r10\n"
+ "mov %%fs:0x40, %%r8\n"
+ "mov %%fs:0x48, %%r9\n"
"cmp %%rbx, %%fs:0x8\n"
"jne 25f\n" // exit process
@@ -255,7 +300,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Check in syscallTable whether this system call is unrestricted
"12:mov %%rax, %%r9\n"
#ifndef NDEBUG
- "cmpw $0, %%fs:0xD0\n" // debug mode
+ "cmpw $0, %%fs:0xD8\n" // debug mode
"jnz 13f\n"
#endif
"cmp playground$maxSyscall(%%rip), %%eax\n"
@@ -287,11 +332,11 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov 0x2C(%%r8), %%r9\n"
"mov 0x24(%%r8), %%r8\n"
"cmp $231, %%rax\n" // NR_exit_group
- "jz 26f\n" // exit program, no message
+ "jz 27f\n" // exit program, no message
"syscall\n"
// Return result of system call to sandboxed thread
- "15:mov %%fs:0x0, %%rsi\n"
+ "15:mov %%fs:0x0, %%rsi\n" // secure_mem
"add $0x1034, %%rsi\n" // buf = &scratch + 52
"mov %%rax, (%%rsi)\n"
"mov $8, %%edx\n" // len = 8
@@ -306,8 +351,8 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// NR_exit:
// Exit trusted thread after cleaning up resources
- "18:mov %%fs:0x0, %%rsi\n"
- "mov 0xE8(%%rsi), %%rdi\n" // fd = threadFdPub
+ "18:mov %%fs:0x0, %%rsi\n" // secure_mem
+ "mov 0xF0(%%rsi), %%rdi\n" // fd = threadFdPub
"mov $3, %%eax\n" // NR_close
"syscall\n"
"mov %%rsi, %%rdi\n" // start = secure_mem
@@ -324,7 +369,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"syscall\n"
"mov %%rax, %%rdi\n"
"test %%rax, %%rax\n"
- "js 26f\n" // exit process
+ "js 27f\n" // exit process
"jne 21f\n" // reap helper, exit thread
"jmp 22f\n" // unlock mutex
@@ -334,12 +379,9 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// resources with the caller (i.e. the previous trusted thread),
// and by extension it shares all resources with the sandbox'd
// threads.
- // N.B. It is possible to make the thread creation code crash before
- // it releases seccomp privileges. This is generally OK, as it just
- // terminates the program. But if we ever support signal handling,
- // we have to be careful that the user cannot install a SIGSEGV
- // handler that gets executed with elevated privileges.
- "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem
+ "19:mov %%fs:0x0, %%rbp\n" // %rbp = old_shared_mem
+ "mov %%rsi, %%r15\n" // remember child stack
+ "mov $1, %%rsi\n" // stack = 1
"syscall\n" // calls NR_clone
"cmp $-4095, %%rax\n" // return codes -1..-4095 are errno values
"jae 7b\n" // unlock mutex, return result
@@ -349,6 +391,23 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// In nascent thread, now.
"sub $2, %%rbx\n"
+
+ // We want to maintain an invalid %rsp whenver we access untrusted
+ // memory. This ensures that even if an attacker can trick us into
+ // triggering a SIGSEGV, we will never successfully execute a signal
+ // handler.
+ // Signal handlers are inherently dangerous, as an attacker could trick
+ // us into returning to the wrong address by adjusting the signal stack
+ // right before the handler returns.
+ // N.B. While POSIX is curiously silent about this, it appears that on
+ // Linux, alternate signal stacks are a per-thread property. That is
+ // good. It means that this security mechanism works, even if the
+ // sandboxed thread manages to set up an alternate signal stack.
+ //
+ // TODO(markus): We currently do not support emulating calls to
+ // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
+ // for a discussion on how to fix this, if this ever becomes neccessary.
+ "mov %%r15, %%r9\n" // %r9 = child_stack
"xor %%r15, %%r15\n" // Request to return from clone() when done
// Get thread id of nascent thread
@@ -358,19 +417,19 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Nascent thread creates socketpair() for sending requests to
// trusted thread.
- // We can create the filehandles on the stack. Filehandles are
+ // We can create the filehandles on the child's stack. Filehandles are
// always treated as untrusted.
// socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
- "push %%r15\n"
+ "sub $0x10, %%r9\n"
+ "mov %%r15, 8(%%r9)\n" // preserve return address on child stack
"mov $53, %%eax\n" // NR_socketpair
"mov $1, %%edi\n" // domain = AF_UNIX
"mov $1, %%esi\n" // type = SOCK_STREAM
"xor %%rdx, %%rdx\n" // protocol = 0
- "sub $8, %%rsp\n" // sv = %rsp
- "mov %%rsp, %%r10\n"
+ "mov %%r9, %%r10\n" // sv = child_stack
"syscall\n"
"test %%rax, %%rax\n"
- "jz 27f\n"
+ "jz 28f\n"
// If things went wrong, we don't have an (easy) way of signaling
// the parent. For our purposes, it is sufficient to fail with a
@@ -403,12 +462,12 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"lea 100f(%%rip), %%rsi\n" // "Sandbox violation detected"
"mov $101f-100f, %%edx\n" // len = strlen(msg)
"syscall\n"
- "mov $1, %%edi\n"
- "26:mov $231, %%eax\n" // NR_exit_group
+ "26:mov $1, %%edi\n"
+ "27:mov $231, %%eax\n" // NR_exit_group
"jmp 24b\n"
// The first page is mapped read-only for use as securely shared memory
- "27:mov 0xC0(%%rbp), %%r12\n" // %r12 = secure shared memory
+ "28:mov 0xC8(%%rbp), %%r12\n" // %r12 = secure shared memory
"cmp %%rbx, 8(%%rbp)\n"
"jne 25b\n" // exit process
"mov $10, %%eax\n" // NR_mprotect
@@ -428,12 +487,12 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
// CLONE_SYSVSEM|CLONE_UNTRACED|CLONE_SETTLS, stack, NULL, NULL,
// tls)
- "mov 4(%%rsp), %%r13d\n" // %r13 = threadFd
+ "mov 4(%%r9), %%r13d\n" // %r13 = threadFd (on child's stack)
"mov $56, %%eax\n" // NR_clone
"mov $0x8D0F00, %%edi\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR|TLS
"mov $1, %%rsi\n" // stack = 1
"mov %%r12, %%r8\n" // tls = new_secure_mem
- "mov 0xC8(%%rbp), %%r15d\n" // %r15 = processFdPub
+ "mov 0xD0(%%rbp), %%r15d\n" // %r15 = processFdPub
"cmp %%rbx, 8(%%rbp)\n"
"jne 25b\n" // exit process
"syscall\n"
@@ -441,13 +500,17 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"js 25b\n" // exit process
"jz 0b\n" // invoke trustedThreadFnc()
+ // Copy the caller's signal mask
+ "mov 0x1054(%%rbp), %%rax\n"
+ "mov %%rax, 0x1054(%%r12)\n"
+
// Done creating trusted thread. We can now get ready to return to caller
- "mov 0(%%rsp), %%r9d\n" // %r9 = threadFdPub
- "add $8, %%rsp\n"
+ "mov %%r9, %%r8\n" // %r8 = child_stack
+ "mov 0(%%r9), %%r9d\n" // %r9 = threadFdPub
// Set up thread local storage with information on how to talk to
// trusted thread and trusted process.
- "lea 0xD8(%%r12), %%rsi\n" // args = &secure_mem.TLS;
+ "lea 0xE0(%%r12), %%rsi\n" // args = &secure_mem.TLS;
"mov $158, %%eax\n" // NR_arch_prctl
"mov $0x1001, %%edi\n" // option = ARCH_SET_GS
"syscall\n"
@@ -459,73 +522,121 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// the very top of this function, you will find that we push 999(%rip)
// on the stack. That is the signal that we should return on the same
// stack rather than return to where clone was called.
- "pop %%r15\n"
+ "mov 8(%%r8), %%r15\n"
+ "add $0x10, %%r8\n"
"test %%r15, %%r15\n"
- "jne 28f\n"
+ "jne 29f\n"
// Returning from clone() into the newly created thread is special. We
// cannot unroll the stack, as we just set up a new stack for this
// thread. We have to explicitly restore CPU registers to the values
// that they had when the program originally called clone().
- "sub $0x80, %%rsp\n" // redzone compensation
- "mov 0x48(%%rbp), %%rax\n"
- "push %%rax\n"
+ // We patch the register values in the signal stack frame so that we
+ // can ask sigreturn() to restore all registers for us.
+ "sub $0x8, %%r8\n"
"mov 0x50(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x00(%%r8)\n" // return address
+ "xor %%rax, %%rax\n"
+ "mov %%rax, 0x98(%%r8)\n" // %rax = 0
"mov 0x58(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x80(%%r8)\n" // %rbp
"mov 0x60(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x88(%%r8)\n" // %rbx
"mov 0x68(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0xA0(%%r8)\n" // %rcx
"mov 0x70(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x90(%%r8)\n" // %rdx
"mov 0x78(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x78(%%r8)\n" // %rsi
"mov 0x80(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x70(%%r8)\n" // %rdi
"mov 0x88(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x30(%%r8)\n" // %r8
"mov 0x90(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x38(%%r8)\n" // %r9
"mov 0x98(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x40(%%r8)\n" // %r10
"mov 0xA0(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x48(%%r8)\n" // %r11
"mov 0xA8(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x50(%%r8)\n" // %r12
"mov 0xB0(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x58(%%r8)\n" // %r13
"mov 0xB8(%%rbp), %%rax\n"
- "push %%rax\n"
+ "mov %%rax, 0x60(%%r8)\n" // %r14
+ "mov 0xC0(%%rbp), %%rax\n"
+ "mov %%rax, 0x68(%%r8)\n" // %r15
"cmp %%rbx, 8(%%rbp)\n"
"jne 25b\n" // exit process
// Nascent thread launches a helper that doesn't share any of our
// resources, except for pages mapped as MAP_SHARED.
- // clone(0, %rsp)
- "28:mov $56, %%eax\n" // NR_clone
+ // clone(SIGCHLD, stack=1)
+ "29:mov $56, %%eax\n" // NR_clone
"mov $17, %%rdi\n" // flags = SIGCHLD
- "mov %%rsp, %%rsi\n" // stack = %rsp
+ "mov $1, %%rsi\n" // stack = 1
"syscall\n"
"test %%rax, %%rax\n"
"js 25b\n" // exit process
- "jne 29f\n"
+ "jne 31f\n"
// Use sendmsg() to send to the trusted process the file handles for
// communicating with the new trusted thread. We also send the address
// of the secure memory area (for sanity checks) and the thread id.
- "mov 0xCC(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub()
+ "mov 0xD4(%%rbp), %%edi\n" // transport = Sandbox::cloneFdPub()
"cmp %%rbx, 8(%%rbp)\n"
"jne 25b\n" // exit process
- "mov %%r9, %%rsi\n" // fd0 = threadFdPub
- "mov %%r13, %%rdx\n" // fd1 = threadFd
- "push %%r14\n" // threadId
- "mov %%esi, 4(%%rsp)\n" // threadFdPub
- "push %%r12\n" // secure_mem
- "mov %%rsp, %%rcx\n" // buf = &data
- "mov $16, %%r8\n" // len = sizeof(void*) + 2*sizeof(int)
- "call playground$sendFd\n"
+
+ // 0x00 msg:
+ // 0x00 msg_name ($0)
+ // 0x08 msg_namelen ($0)
+ // 0x10 msg_iov (%r8 + 0x44)
+ // 0x18 msg_iovlen ($1)
+ // 0x20 msg_control (%r8 + 0x54)
+ // 0x28 msg_controllen ($0x18)
+ // 0x30 data:
+ // 0x30 msg_flags/err ($0)
+ // 0x34 secure_mem (%r12)
+ // 0x3C threadId (%r14d)
+ // 0x40 threadFdPub (%r9d)
+ // 0x44 iov:
+ // 0x44 iov_base (%r8 + 0x30)
+ // 0x4C iov_len ($0x14)
+ // 0x54 cmsg:
+ // 0x54 cmsg_len ($0x18)
+ // 0x5C cmsg_level ($1, SOL_SOCKET)
+ // 0x60 cmsg_type ($1, SCM_RIGHTS)
+ // 0x64 threadFdPub (%r9d)
+ // 0x68 threadFd (%r13d)
+ // 0x6C
+ "sub $0x6C, %%r8\n"
+ "xor %%rdx, %%rdx\n" // flags = 0
+ "mov %%rdx, 0x00(%%r8)\n" // msg_name
+ "mov %%edx, 0x08(%%r8)\n" // msg_namelen
+ "mov %%edx, 0x30(%%r8)\n" // msg_flags
+ "mov $1, %%r11d\n"
+ "mov %%r11, 0x18(%%r8)\n" // msg_iovlen
+ "mov %%r11d, 0x5C(%%r8)\n" // cmsg_level
+ "mov %%r11d, 0x60(%%r8)\n" // cmsg_type
+ "lea 0x30(%%r8), %%r11\n"
+ "mov %%r11, 0x44(%%r8)\n" // iov_base
+ "add $0x14, %%r11\n"
+ "mov %%r11, 0x10(%%r8)\n" // msg_iov
+ "add $0x10, %%r11\n"
+ "mov %%r11, 0x20(%%r8)\n" // msg_control
+ "mov $0x14, %%r11d\n"
+ "mov %%r11, 0x4C(%%r8)\n" // iov_len
+ "add $4, %%r11d\n"
+ "mov %%r11, 0x28(%%r8)\n" // msg_controllen
+ "mov %%r11, 0x54(%%r8)\n" // cmsg_len
+ "mov %%r12, 0x34(%%r8)\n" // secure_mem
+ "mov %%r14d, 0x3C(%%r8)\n" // threadId
+ "mov %%r9d, 0x40(%%r8)\n" // threadFdPub
+ "mov %%r9d, 0x64(%%r8)\n" // threadFdPub
+ "mov %%r13d, 0x68(%%r8)\n" // threadFd
+ "mov $46, %%eax\n" // NR_sendmsg
+ "mov %%r8, %%rsi\n" // msg
+ "syscall\n"
// Release syscall_mutex_. This signals the trusted process that
// it can write into the original thread's secure memory again.
@@ -534,23 +645,29 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov $4096, %%esi\n"
"mov $3, %%edx\n" // PROT_READ | PROT_WRITE
"syscall\n"
+ "cmp %%rbx, 8(%%rbp)\n"
+ "jne 25b\n" // exit process
"lock; addl $0x80000000, (%%rdi)\n"
- "jz 26b\n" // exit process (no error message)
+ "jz 30f\n" // exit process (no error message)
"mov $1, %%edx\n"
"mov %%rdx, %%rsi\n" // FUTEX_WAKE
"mov $202, %%eax\n" // NR_futex
"syscall\n"
- "jmp 26b\n" // exit process (no error message)
+ "30:xor %%rdi, %%rdi\n"
+ "jmp 27b\n" // exit process (no error message)
// Reap helper
- "29:mov %%rax, %%rdi\n"
- "30:xor %%rsi, %%rsi\n"
+ "31:mov %%rax, %%rdi\n"
+ "32:lea -4(%%r8), %%rsi\n"
"xor %%rdx, %%rdx\n"
"xor %%r10, %%r10\n"
"mov $61, %%eax\n" // NR_wait4
"syscall\n"
"cmp $-4, %%eax\n" // EINTR
- "jz 30b\n"
+ "jz 32b\n"
+ "mov -4(%%r8), %%eax\n"
+ "test %%rax, %%rax\n"
+ "jnz 26b\n" // exit process (no error message)
// Release privileges by entering seccomp mode.
"mov $157, %%eax\n" // NR_prctl
@@ -560,6 +677,10 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"test %%rax, %%rax\n"
"jnz 25b\n" // exit process
+ // We can finally start using the stack. Signal handlers no longer pose
+ // a threat to us.
+ "mov %%r8, %%rsp\n"
+
// Back in the newly created sandboxed thread, wait for trusted process
// to receive request. It is possible for an attacker to make us
// continue even before the trusted process is done. This is OK. It'll
@@ -569,10 +690,10 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov $1, %%edx\n" // len = 1
"mov %%rsp, %%rsi\n" // buf = %rsp
"mov %%r9, %%rdi\n" // fd = threadFdPub
- "31:xor %%rax, %%rax\n" // NR_read
+ "33:xor %%rax, %%rax\n" // NR_read
"syscall\n"
"cmp $-4, %%rax\n" // EINTR
- "jz 31b\n"
+ "jz 33b\n"
"cmp %%rdx, %%rax\n"
"jne 25b\n" // exit process
"pop %%rax\n"
@@ -580,27 +701,16 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Return to caller. We are in the new thread, now.
"xor %%rax, %%rax\n"
"test %%r15, %%r15\n"
-
- // Returning to createTrustedThread()
- "jz 32f\n"
- "jmp *%%r15\n"
-
- // Returning to the place where clone() had been called
- "32:pop %%r15\n"
- "pop %%r14\n"
- "pop %%r13\n"
- "pop %%r12\n"
- "pop %%r11\n"
- "pop %%r10\n"
- "pop %%r9\n"
- "pop %%r8\n"
- "pop %%rdi\n"
- "pop %%rsi\n"
- "pop %%rdx\n"
- "pop %%rcx\n"
- "pop %%rbx\n"
- "pop %%rbp\n"
- "ret\n"
+ "jnz 34f\n" // Returning to createTrustedThread()
+
+ // Returning to the place where clone() had been called. We rely on
+ // using rt_sigreturn() for restoring our registers. The caller already
+ // created a signal stack frame, and we patched the register values
+ // with the ones that were in effect prior to calling sandbox_clone().
+ "pop %%r15\n"
+ "34:mov %%r15, 0xA8(%%rsp)\n" // compute new %rip
+ "mov $15, %%eax\n" // NR_rt_sigreturn
+ "syscall\n"
".pushsection \".rodata\"\n"
"100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"
@@ -638,19 +748,60 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"movd %0, %%mm6\n" // %mm6 = args
"lea 999f, %%ebx\n" // continue in same thread
"movd %%ebx, %%mm3\n"
- "xor %%ebx, %%ebx\n" // initial sequence number
- "movd %%ebx, %%mm2\n"
+ "xor %%edi, %%edi\n" // initial sequence number
+ "movd %%edi, %%mm2\n"
+
+ // Signal handlers are process-wide. This means that for security
+ // reasons, we cannot allow that the trusted thread ever executes any
+ // signal handlers.
+ // We prevent the execution of signal handlers by setting a signal
+ // mask that blocks all signals. In addition, we make sure that the
+ // stack pointer is invalid.
+ // We cannot reset the signal mask until after we have enabled
+ // Seccomp mode. Our sigprocmask() wrapper would normally do this by
+ // raising a signal, modifying the signal mask in the kernel-generated
+ // signal frame, and then calling sigreturn(). This presents a bit of
+ // a Catch-22, as all signals are masked and we can therefore not
+ // raise any signal that would allow us to generate the signal stack
+ // frame.
+ // Instead, we have to create the signal stack frame prior to entering
+ // Seccomp mode. This incidentally also helps us to restore the
+ // signal mask to the same value that it had prior to entering the
+ // sandbox.
+ // The signal wrapper for clone() is the second entry point into this
+ // code (by means of sending an IPC to its trusted thread). It goes
+ // through the same steps of creating a signal stack frame on the
+ // newly created thread's stacks prior to cloning. See clone.cc for
+ // details.
+ "mov $120+0xF000, %%eax\n" // __NR_clone + 0xF000
+ "sub $8, %%esp\n"
+ "mov %%esp, %%edx\n" // push a signal stack frame (see clone.cc)
+ "mov %%esp, 0(%%esp)\n"
+ "int $0\n"
+ "mov 0(%%esp), %%ebp\n"
+ "add $8, 0x1C(%%ebp)\n" // pop stack upon call to sigreturn()
+ "mov $2, %%ebx\n" // how = SIG_SETMASK
+ "movl $-1, 0(%%esp)\n"
+ "movl $-1, 4(%%esp)\n"
+ "mov %%esp, %%ecx\n" // set = full mask
+ "xor %%edx, %%edx\n" // old_set = NULL
+ "mov $8, %%esi\n" // mask all 64 signals
+ "mov $175, %%eax\n" // NR_rt_sigprocmask
+ "int $0x80\n"
+ "mov $126, %%eax\n" // NR_sigprocmask
+ "int $0x80\n"
+ "xor %%esp, %%esp\n" // invalidate the stack in all trusted code
"jmp 20f\n" // create trusted thread
// TODO(markus): Coalesce the read() operations by reading into a bigger
// buffer.
// Parameters:
- // %mm5: secure memory region
- // the page following this one contains the scratch space
// %mm0: thread's side of threadFd
// %mm1: processFdPub
// %mm3: return address after creation of new trusted thread
+ // %mm5: secure memory region
+ // the page following this one contains the scratch space
// Local variables:
// %mm2: sequence number for trusted calls
@@ -664,28 +815,29 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Layout of secure shared memory region (c.f. securemem.h):
// 0x00: pointer to the secure shared memory region (i.e. self)
// 0x04: sequence number; must match %mm2
- // 0x08: system call number; passed to syscall in %eax
- // 0x0C: first argument; passed to syscall in %ebx
- // 0x10: second argument; passed to syscall in %ecx
- // 0x14: third argument; passed to syscall in %edx
- // 0x18: fourth argument; passed to syscall in %esi
- // 0x1C: fifth argument; passed to syscall in %edi
- // 0x20: sixth argument; passed to syscall in %ebp
- // 0x24: stored return address for clone() system call
- // 0x28: stored %ebp value for clone() system call
- // 0x2C: stored %edi value for clone() system call
- // 0x30: stored %esi value for clone() system call
- // 0x34: stored %edx value for clone() system call
- // 0x38: stored %ecx value for clone() system call
- // 0x3C: stored %ebx value for clone() system call
- // 0x40: new shared memory for clone()
- // 0x44: processFdPub for talking to trusted process
- // 0x48: cloneFdPub for talking to trusted process
- // 0x4C: set to non-zero, if in debugging mode
- // 0x50: most recent SHM id returned by shmget(IPC_PRIVATE)
- // 0x54: cookie assigned to us by the trusted process (TLS_COOKIE)
- // 0x5C: thread id (TLS_TID)
- // 0x64: threadFdPub (TLS_THREAD_FD)
+ // 0x08: call type; must match %eax, iff %eax == -1 || %eax == -2
+ // 0x0C: system call number; passed to syscall in %eax
+ // 0x10: first argument; passed to syscall in %ebx
+ // 0x14: second argument; passed to syscall in %ecx
+ // 0x18: third argument; passed to syscall in %edx
+ // 0x1C: fourth argument; passed to syscall in %esi
+ // 0x20: fifth argument; passed to syscall in %edi
+ // 0x24: sixth argument; passed to syscall in %ebp
+ // 0x28: stored return address for clone() system call
+ // 0x2C: stored %ebp value for clone() system call
+ // 0x30: stored %edi value for clone() system call
+ // 0x34: stored %esi value for clone() system call
+ // 0x38: stored %edx value for clone() system call
+ // 0x3C: stored %ecx value for clone() system call
+ // 0x40: stored %ebx value for clone() system call
+ // 0x44: new shared memory for clone()
+ // 0x48: processFdPub for talking to trusted process
+ // 0x4C: cloneFdPub for talking to trusted process
+ // 0x50: set to non-zero, if in debugging mode
+ // 0x54: most recent SHM id returned by shmget(IPC_PRIVATE)
+ // 0x58: cookie assigned to us by the trusted process (TLS_COOKIE)
+ // 0x60: thread id (TLS_TID)
+ // 0x68: threadFdPub (TLS_THREAD_FD)
// 0x200-0x1000: securely passed verified file name(s)
// Layout of (untrusted) scratch space:
@@ -703,6 +855,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// 0x2C: last system call (updated in syscall.cc)
// 0x30: number of consecutive calls to a time fnc. (e.g. gettimeofday)
// 0x34: nesting level of system calls (for debugging purposes only)
+ // 0x38: signal mask
"0:xor %%esp, %%esp\n"
"mov $2, %%eax\n" // %mm2 = initial sequence number
@@ -713,7 +866,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// read(threadFd, &scratch, 4)
"1:mov $3, %%eax\n" // NR_read
"movd %%mm0, %%ebx\n" // fd = threadFd
- "movd %%mm5, %%ecx\n"
+ "movd %%mm5, %%ecx\n" // secure_mem
"add $0x1000, %%ecx\n" // buf = &scratch
"mov $4, %%edx\n" // len = 4
"2:int $0x80\n"
@@ -734,13 +887,15 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"3:movd %%mm2, %%ebp\n"
"cmp %%ebp, 0x4-0x1000(%%ecx)\n"
"jne 25f\n" // exit process
- "mov 0x08-0x1000(%%ecx), %%eax\n"
- "mov 0x0C-0x1000(%%ecx), %%ebx\n"
- "mov 0x14-0x1000(%%ecx), %%edx\n"
- "mov 0x18-0x1000(%%ecx), %%esi\n"
- "mov 0x1C-0x1000(%%ecx), %%edi\n"
- "mov 0x20-0x1000(%%ecx), %%ebp\n"
- "mov 0x10-0x1000(%%ecx), %%ecx\n"
+ "cmp 0x08-0x1000(%%ecx), %%eax\n"
+ "jne 25f\n" // exit process
+ "mov 0x0C-0x1000(%%ecx), %%eax\n"
+ "mov 0x10-0x1000(%%ecx), %%ebx\n"
+ "mov 0x18-0x1000(%%ecx), %%edx\n"
+ "mov 0x1C-0x1000(%%ecx), %%esi\n"
+ "mov 0x20-0x1000(%%ecx), %%edi\n"
+ "mov 0x24-0x1000(%%ecx), %%ebp\n"
+ "mov 0x14-0x1000(%%ecx), %%ecx\n"
"movd %%edi, %%mm4\n"
"movd %%ebp, %%mm7\n"
"movd %%mm2, %%ebp\n"
@@ -773,14 +928,14 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov $3, %%edx\n" // prot = PROT_READ | PROT_WRITE
"mov $125, %%eax\n" // NR_mprotect
"int $0x80\n"
- "mov %%ebp, 0x50(%%ebx)\n" // set most recently returned SysV shm id
+ "mov %%ebp, 0x54(%%ebx)\n" // set most recently returned SysV shm id
"xor %%ebx, %%ebx\n"
// When debugging messages are enabled, warn about expensive system calls
#ifndef NDEBUG
"movd %%mm5, %%ecx\n"
- "cmpw $0, 0x4C(%%ecx)\n" // debug mode
- "jz 26f\n"
+ "cmpw $0, 0x50(%%ecx)\n" // debug mode
+ "jz 27f\n"
"mov $4, %%eax\n" // NR_write
"mov $2, %%ebx\n" // fd = stderr
"lea 101f, %%ecx\n" // "This is an expensive system call"
@@ -789,7 +944,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"xor %%ebx, %%ebx\n"
#endif
- "jmp 26f\n" // exit program, no message
+ "jmp 27f\n" // exit program, no message
"4:int $0x80\n"
"jmp 15f\n" // return result
@@ -801,10 +956,12 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"movd %%mm2, %%ebp\n"
"cmp %%ebp, 0x4-0x1000(%%ecx)\n"
"jne 25f\n" // exit process
+ "cmp %%eax, 0x8-0x1000(%%ecx)\n"
+ "jne 25f\n" // exit process
// When debugging messages are enabled, warn about expensive system calls
#ifndef NDEBUG
- "cmpw $0, 0x4C-0x1000(%%ecx)\n"
+ "cmpw $0, 0x50-0x1000(%%ecx)\n"
"jz 6f\n" // debug mode
"mov %%ecx, %%ebp\n"
"mov $4, %%eax\n" // NR_write
@@ -816,13 +973,13 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"6:"
#endif
- "mov 0x08-0x1000(%%ecx), %%eax\n"
- "mov 0x0C-0x1000(%%ecx), %%ebx\n"
- "mov 0x14-0x1000(%%ecx), %%edx\n"
- "mov 0x18-0x1000(%%ecx), %%esi\n"
- "mov 0x1C-0x1000(%%ecx), %%edi\n"
- "mov 0x20-0x1000(%%ecx), %%ebp\n"
- "mov 0x10-0x1000(%%ecx), %%ecx\n"
+ "mov 0x0C-0x1000(%%ecx), %%eax\n"
+ "mov 0x10-0x1000(%%ecx), %%ebx\n"
+ "mov 0x18-0x1000(%%ecx), %%edx\n"
+ "mov 0x1C-0x1000(%%ecx), %%esi\n"
+ "mov 0x20-0x1000(%%ecx), %%edi\n"
+ "mov 0x24-0x1000(%%ecx), %%ebp\n"
+ "mov 0x14-0x1000(%%ecx), %%ecx\n"
"movd %%edi, %%mm4\n"
"movd %%ebp, %%mm7\n"
"movd %%mm2, %%ebp\n"
@@ -864,7 +1021,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov $7, %%eax\n" // NR_waitpid
"int $0x80\n"
"cmp $-4, %%eax\n" // EINTR
- "jz 6\n"
+ "jz 8b\n"
"mov %%ebp, %%eax\n"
"jmp 15f\n" // return result
@@ -889,7 +1046,7 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Check in syscallTable whether this system call is unrestricted
"12:mov %%eax, %%ebp\n"
#ifndef NDEBUG
- "cmpw $0, 0x4C-0x1000(%%ecx)\n"
+ "cmpw $0, 0x50-0x1000(%%ecx)\n"
"jnz 13f\n" // debug mode
#endif
"cmp playground$maxSyscall, %%eax\n"
@@ -919,11 +1076,11 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov 0x14(%%ecx), %%ebp\n"
"mov 0x04(%%ecx), %%ecx\n"
"cmp $252, %%eax\n" // NR_exit_group
- "jz 26f\n" // exit program, no message
+ "jz 27f\n" // exit program, no message
"int $0x80\n"
// Return result of system call to sandboxed thread
- "15:movd %%mm5, %%ecx\n"
+ "15:movd %%mm5, %%ecx\n" // secure_mem
"add $0x101C, %%ecx\n" // buf = &scratch + 28
"mov %%eax, (%%ecx)\n"
"mov $4, %%edx\n" // len = 4
@@ -938,8 +1095,8 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// NR_exit:
// Exit trusted thread after cleaning up resources
- "18:mov %%edi, %%ecx\n"
- "mov 0x64(%%ecx), %%ebx\n" // fd = threadFdPub
+ "18:mov %%edi, %%ecx\n" // secure_mem
+ "mov 0x68(%%ecx), %%ebx\n" // fd = threadFdPub
"mov $6, %%eax\n" // NR_close
"int $0x80\n"
"mov %%ecx, %%ebx\n" // start = secure_mem
@@ -966,14 +1123,10 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// resources with the caller (i.e. the previous trusted thread),
// and by extension it shares all resources with the sandbox'd
// threads.
- // N.B. It is possible to make the thread creation code crash before
- // it releases seccomp privileges. This is generally OK, as it just
- // terminates the program. But if we ever support signal handling,
- // we have to be careful that the user cannot install a SIGSEGV
- // handler that gets executed with elevated privileges.
- "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem
- "movd %%mm4, %%edi\n"
- "movd %%mm7, %%ebp\n"
+ "19:movd %%edi, %%mm6\n" // %mm6 = old_shared_mem
+ "movd %%mm4, %%edi\n" // child_tidptr
+ "mov %%ecx, %%ebp\n" // remember child stack
+ "mov $1, %%ecx\n" // stack = 1
"int $0x80\n" // calls NR_clone
"cmp $-4095, %%eax\n" // return codes -1..-4095 are errno values
"jae 7b\n" // unlock mutex, return result
@@ -986,6 +1139,22 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// In nascent thread, now.
"sub $2, %%edi\n"
"movd %%edi, %%mm2\n"
+
+ // We want to maintain an invalid %esp whenver we access untrusted
+ // memory. This ensures that even if an attacker can trick us into
+ // triggering a SIGSEGV, we will never successfully execute a signal
+ // handler.
+ // Signal handlers are inherently dangerous, as an attacker could trick
+ // us into returning to the wrong address by adjusting the signal stack
+ // right before the handler returns.
+ // N.B. While POSIX is curiously silent about this, it appears that on
+ // Linux, alternate signal stacks are a per-thread property. That is
+ // good. It means that this security mechanism works, even if the
+ // sandboxed thread manages to set up an alternate signal stack.
+ //
+ // TODO(markus): We currently do not support emulating calls to
+ // sys_clone() with a zero (i.e. copy) stack parameter. See clone.cc
+ // for a discussion on how to fix this, if this ever becomes neccessary.
"movd %%eax, %%mm3\n" // Request to return from clone() when done
// Get thread id of nascent thread
@@ -995,23 +1164,20 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Nascent thread creates socketpair() for sending requests to
// trusted thread.
- // We can create the filehandles on the stack. Filehandles are
+ // We can create the filehandles on the child's stack. Filehandles are
// always treated as untrusted.
// socketpair(AF_UNIX, SOCK_STREAM, 0, fds)
"mov $102, %%eax\n" // NR_socketcall
"mov $8, %%ebx\n" // socketpair
- "sub $8, %%esp\n" // sv = %rsp
- "push %%esp\n"
- "xor %%ecx, %%ecx\n" // protocol = 0
- "push %%ecx\n"
- "mov $1, %%ecx\n" // type = SOCK_STREAM
- "push %%ecx\n"
- "push %%ecx\n" // domain = AF_UNIX
- "mov %%esp, %%ecx\n"
+ "sub $8, %%ebp\n" // sv = child_stack
+ "mov %%ebp, -0x04(%%ebp)\n"
+ "movl $0, -0x08(%%ebp)\n" // protocol = 0
+ "movl $1, -0x0C(%%ebp)\n" // type = SOCK_STREAM
+ "movl $1, -0x10(%%ebp)\n" // domain = AF_UNIX
+ "lea -0x10(%%ebp), %%ecx\n"
"int $0x80\n"
- "add $0x10, %%esp\n"
"test %%eax, %%eax\n"
- "jz 27f\n"
+ "jz 28f\n"
// If things went wrong, we don't have an (easy) way of signaling
// the parent. For our purposes, it is sufficient to fail with a
@@ -1043,19 +1209,18 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"lea 100f, %%ecx\n" // "Sandbox violation detected"
"mov $101f-100f, %%edx\n" // len = strlen(msg)
"int $0x80\n"
- "mov $1, %%ebx\n"
- "26:mov $252, %%eax\n" // NR_exit_group
+ "26:mov $1, %%ebx\n"
+ "27:mov $252, %%eax\n" // NR_exit_group
"jmp 24b\n"
// The first page is mapped read-only for use as securely shared memory
- "27:movd %%mm6, %%ebp\n"
- "mov 0x40(%%ebp), %%esi\n"
- "movd %%esi, %%mm5\n" // %mm5 = secure shared memory
- "movd %%mm2, %%edi\n"
- "cmp %%edi, 4(%%ebp)\n"
+ "28:movd %%mm6, %%edi\n" // %edi = old_shared_mem
+ "mov 0x44(%%edi), %%ebx\n" // addr = secure_mem
+ "movd %%ebx, %%mm5\n" // %mm5 = secure_mem
+ "movd %%mm2, %%esi\n"
+ "cmp %%esi, 4(%%edi)\n"
"jne 25b\n" // exit process
"mov $125, %%eax\n" // NR_mprotect
- "mov %%esi, %%ebx\n"
"mov $4096, %%ecx\n" // len = 4096
"mov $1, %%edx\n" // prot = PROT_READ
"int $0x80\n"
@@ -1070,13 +1235,13 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Call clone() to create new trusted thread().
// clone(CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
// CLONE_SYSVSEM|CLONE_UNTRACED, stack, NULL, NULL, NULL)
- "mov 4(%%esp), %%eax\n"
+ "mov 4(%%ebp), %%eax\n" // threadFd (on child's stack)
"movd %%eax, %%mm0\n" // %mm0 = threadFd
"mov $120, %%eax\n" // NR_clone
"mov $0x850F00, %%ebx\n" // flags = VM|FS|FILES|SIGH|THR|SYSV|UTR
"mov $1, %%ecx\n" // stack = 1
- "movd 0x44(%%ebp), %%mm1\n" // %mm1 = processFdPub
- "cmp %%edi, 4(%%ebp)\n"
+ "movd 0x48(%%edi), %%mm1\n" // %mm1 = processFdPub
+ "cmp %%esi, 4(%%edi)\n"
"jne 25b\n" // exit process
"int $0x80\n"
"test %%eax, %%eax\n"
@@ -1085,86 +1250,146 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// Set up thread local storage
"mov $0x51, %%eax\n" // seg_32bit, limit_in_pages, useable
- "push %%eax\n"
+ "mov %%eax, -0x04(%%ebp)\n"
"mov $0xFFFFF, %%eax\n" // limit
- "push %%eax\n"
- "add $0x54, %%esi\n"
- "push %%esi\n" // base_addr = &secure_mem.TLS
+ "mov %%eax, -0x08(%%ebp)\n"
+ "movd %%mm5, %%eax\n"
+ "add $0x58, %%eax\n"
+ "mov %%eax, -0x0C(%%ebp)\n" // base_addr = &secure_mem.TLS
"mov %%fs, %%eax\n"
"shr $3, %%eax\n"
- "push %%eax\n" // entry_number
+ "mov %%eax, -0x10(%%ebp)\n" // entry_number
"mov $243, %%eax\n" // NR_set_thread_area
- "mov %%esp, %%ebx\n"
+ "lea -0x10(%%ebp), %%ebx\n"
"int $0x80\n"
"test %%eax, %%eax\n"
"jnz 25b\n" // exit process
- "add $16, %%esp\n"
+
+ // Copy the caller's signal mask
+ "movd %%mm5, %%edx\n"
+ "mov 0x1038(%%edi), %%eax\n"
+ "mov %%eax, 0x1038(%%edx)\n"
+ "mov 0x103C(%%edi), %%eax\n"
+ "mov %%eax, 0x103C(%%edx)\n"
// Done creating trusted thread. We can now get ready to return to caller
- "mov 0(%%esp), %%esi\n" // %esi = threadFdPub
- "add $8, %%esp\n"
+ "mov 0(%%ebp), %%esi\n" // %esi = threadFdPub
+ "add $8, %%ebp\n"
// Check whether this is the initial thread, or a newly created one.
// At startup we run the same code as when we create a new thread. At
- // the very top of this function, you will find that we store 999(%rip)
+ // the very top of this function, you will find that we store 999f
// in %%mm3. That is the signal that we should return on the same
// stack rather than return to where clone was called.
"movd %%mm3, %%eax\n"
+ "movd %%mm2, %%edx\n"
"test %%eax, %%eax\n"
- "jne 28f\n"
+ "jne 29f\n"
// Returning from clone() into the newly created thread is special. We
// cannot unroll the stack, as we just set up a new stack for this
// thread. We have to explicitly restore CPU registers to the values
// that they had when the program originally called clone().
- "mov 0x24(%%ebp), %%eax\n"
- "push %%eax\n"
- "mov 0x28(%%ebp), %%eax\n"
- "push %%eax\n"
- "mov 0x2C(%%ebp), %%eax\n"
- "push %%eax\n"
- "mov 0x30(%%ebp), %%eax\n"
- "push %%eax\n"
- "mov 0x34(%%ebp), %%eax\n"
- "push %%eax\n"
- "mov 0x38(%%ebp), %%eax\n"
- "push %%eax\n"
- "mov 0x3C(%%ebp), %%eax\n"
- "push %%eax\n"
- "cmp %%edi, 4(%%ebp)\n"
+ // We patch the register values in the signal stack frame so that we
+ // can ask sigreturn() to restore all registers for us.
+ "sub $0x4, %%ebp\n"
+ "mov 0x28(%%edi), %%eax\n"
+ "mov %%eax, 0x00(%%ebp)\n" // return address
+ "xor %%eax, %%eax\n"
+ "mov %%eax, 0x30(%%ebp)\n" // %eax = 0
+ "mov 0x2C(%%edi), %%eax\n"
+ "mov %%eax, 0x1C(%%ebp)\n" // %ebp
+ "mov 0x30(%%edi), %%eax\n"
+ "mov %%eax, 0x14(%%ebp)\n" // %edi
+ "mov 0x34(%%edi), %%eax\n"
+ "mov %%eax, 0x18(%%ebp)\n" // %esi
+ "mov 0x38(%%edi), %%eax\n"
+ "mov %%eax, 0x28(%%ebp)\n" // %edx
+ "mov 0x3C(%%edi), %%eax\n"
+ "mov %%eax, 0x2C(%%ebp)\n" // %ecx
+ "mov 0x40(%%edi), %%eax\n"
+ "mov %%eax, 0x24(%%ebp)\n" // %ebx
+ "cmp %%edx, 4(%%edi)\n"
"jne 25b\n" // exit process
// Nascent thread launches a helper that doesn't share any of our
// resources, except for pages mapped as MAP_SHARED.
- // clone(0, %esp)
- "28:mov $120, %%eax\n" // NR_clone
+ // clone(SIGCHLD, stack=1)
+ "29:mov $120, %%eax\n" // NR_clone
"mov $17, %%ebx\n" // flags = SIGCHLD
- "mov %%esp, %%ecx\n" // stack = %esp
+ "mov $1, %%ecx\n" // stack = 1
"int $0x80\n"
"test %%eax, %%eax\n"
"js 25b\n" // exit process
- "jne 29f\n"
+ "jne 31f\n"
// Use sendmsg() to send to the trusted process the file handles for
// communicating with the new trusted thread. We also send the address
// of the secure memory area (for sanity checks) and the thread id.
- "push %%esi\n" // threadFdPub
- "movd %%mm4, %%eax\n" // threadId
- "push %%eax\n"
- "movd %%mm5, %%eax\n" // secure_mem
- "push %%eax\n"
- "mov %%esp, %%ebx\n" // buf = &data
- "mov $12, %%eax\n" // len = sizeof(void*) + 2*sizeof(int)
- "push %%eax\n"
- "push %%ebx\n"
- "movd %%mm0, %%eax\n" // fd1 = threadFd
- "push %%eax\n"
- "push %%esi\n" // fd0 = threadFdPub
- "mov 0x48(%%ebp), %%eax\n" // transport = Sandbox::cloneFdPub()
- "cmp %%edi, 4(%%ebp)\n"
+ "cmp %%edx, 4(%%edi)\n"
"jne 25b\n" // exit process
- "push %%eax\n"
- "call playground$sendFd\n"
+
+ // 0x00 socketcall:
+ // 0x00 socket (0x4C(%edi))
+ // 0x04 msg (%ecx + 0x0C)
+ // 0x08 flags ($0)
+ // 0x0C msg:
+ // 0x0C msg_name ($0)
+ // 0x10 msg_namelen ($0)
+ // 0x14 msg_iov (%ecx + 0x34)
+ // 0x18 msg_iovlen ($1)
+ // 0x1C msg_control (%ecx + 0x3C)
+ // 0x20 msg_controllen ($0x14)
+ // 0x24 data:
+ // 0x24 msg_flags/err ($0)
+ // 0x28 secure_mem (%mm5)
+ // 0x2C threadId (%mm4)
+ // 0x30 threadFdPub (%esi)
+ // 0x34 iov:
+ // 0x34 iov_base (%ecx + 0x24)
+ // 0x38 iov_len ($0x10)
+ // 0x3C cmsg:
+ // 0x3C cmsg_len ($0x14)
+ // 0x40 cmsg_level ($1, SOL_SOCKET)
+ // 0x44 cmsg_type ($1, SCM_RIGHTS)
+ // 0x48 threadFdPub (%esi)
+ // 0x4C threadFd (%mm0)
+ // 0x50
+ "lea -0x50(%%ebp), %%ecx\n"
+ "xor %%eax, %%eax\n"
+ "mov %%eax, 0x08(%%ecx)\n" // flags
+ "mov %%eax, 0x0C(%%ecx)\n" // msg_name
+ "mov %%eax, 0x10(%%ecx)\n" // msg_namelen
+ "mov %%eax, 0x24(%%ecx)\n" // msg_flags
+ "inc %%eax\n"
+ "mov %%eax, 0x18(%%ecx)\n" // msg_iovlen
+ "mov %%eax, 0x40(%%ecx)\n" // cmsg_level
+ "mov %%eax, 0x44(%%ecx)\n" // cmsg_type
+ "movl $0x10, 0x38(%%ecx)\n" // iov_len
+ "mov $0x14, %%eax\n"
+ "mov %%eax, 0x20(%%ecx)\n" // msg_controllen
+ "mov %%eax, 0x3C(%%ecx)\n" // cmsg_len
+ "mov 0x4C(%%edi), %%eax\n" // cloneFdPub
+ "mov %%eax, 0x00(%%ecx)\n" // socket
+ "lea 0x0C(%%ecx), %%eax\n"
+ "mov %%eax, 0x04(%%ecx)\n" // msg
+ "add $0x18, %%eax\n"
+ "mov %%eax, 0x34(%%ecx)\n" // iov_base
+ "add $0x10, %%eax\n"
+ "mov %%eax, 0x14(%%ecx)\n" // msg_iov
+ "add $8, %%eax\n"
+ "mov %%eax, 0x1C(%%ecx)\n" // msg_control
+ "mov %%esi, 0x30(%%ecx)\n" // threadFdPub
+ "mov %%esi, 0x48(%%ecx)\n" // threadFdPub
+ "movd %%mm5, %%eax\n"
+ "mov %%eax, 0x28(%%ecx)\n" // secure_mem
+ "movd %%mm4, %%eax\n"
+ "mov %%eax, 0x2C(%%ecx)\n" // threadId
+ "movd %%mm0, %%eax\n"
+ "mov %%eax, 0x4C(%%ecx)\n" // threadFd
+ "mov $16, %%ebx\n" // sendmsg()
+ "mov $102, %%eax\n" // NR_socketcall
+ "int $0x80\n"
// Release syscall_mutex_. This signals the trusted process that
// it can write into the original thread's secure memory again.
@@ -1173,31 +1398,42 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
"mov $4096, %%ecx\n"
"mov $3, %%edx\n" // PROT_READ | PROT_WRITE
"int $0x80\n"
+ "movd %%mm2, %%edx\n"
+ "cmp %%edx, 0x4(%%edi)\n"
+ "jnz 25b\n" // exit process
"lock; addl $0x80000000, (%%ebx)\n"
- "jz 26b\n" // exit process (no error message)
+ "jz 30f\n" // exit process (no error message)
"mov $1, %%edx\n"
"mov %%edx, %%ecx\n" // FUTEX_WAKE
"mov $240, %%eax\n" // NR_futex
"int $0x80\n"
- "jmp 26b\n" // exit process (no error message)
+ "30:xor %%ebx, %%ebx\n"
+ "jmp 27b\n" // exit process (no error message)
// Reap helper
- "29:mov %%eax, %%ebx\n"
- "30:xor %%ecx, %%ecx\n"
+ "31:mov %%eax, %%ebx\n"
+ "32:lea -4(%%ebp), %%ecx\n"
"xor %%edx, %%edx\n"
"mov $7, %%eax\n" // NR_waitpid
"int $0x80\n"
"cmp $-4, %%eax\n" // EINTR
- "jz 30b\n"
+ "jz 32b\n"
+ "mov -4(%%ebp), %%eax\n"
+ "test %%eax, %%eax\n"
+ "jnz 26b\n" // exit process (no error message)
// Release privileges by entering seccomp mode.
- "mov $172, %%eax\n" // NR_prctl
+ "33:mov $172, %%eax\n" // NR_prctl
"mov $22, %%ebx\n" // PR_SET_SECCOMP
"mov $1, %%ecx\n"
"int $0x80\n"
"test %%eax, %%eax\n"
"jnz 25b\n" // exit process
+ // We can finally start using the stack. Signal handlers no longer pose
+ // a threat to us.
+ "mov %%ebp, %%esp\n"
+
// Back in the newly created sandboxed thread, wait for trusted process
// to receive request. It is possible for an attacker to make us
// continue even before the trusted process is done. This is OK. It'll
@@ -1205,12 +1441,12 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// data is considered untrusted anyway.
"push %%eax\n"
"mov $1, %%edx\n" // len = 1
- "mov %%esp, %%ecx\n" // buf = %rsp
+ "mov %%esp, %%ecx\n" // buf = %esp
"mov %%esi, %%ebx\n" // fd = threadFdPub
- "31:mov $3, %%eax\n" // NR_read
+ "34:mov $3, %%eax\n" // NR_read
"int $0x80\n"
"cmp $-4, %%eax\n" // EINTR
- "jz 31b\n"
+ "jz 34b\n"
"cmp %%edx, %%eax\n"
"jne 25b\n" // exit process
"pop %%eax\n"
@@ -1223,19 +1459,17 @@ void Sandbox::createTrustedThread(int processFdPub, int cloneFdPub,
// operations.
"emms\n"
- // Returning to createTrustedThread()
"test %%ebx, %%ebx\n"
- "jz 32f\n"
- "jmp *%%ebx\n"
-
- // Returning to the place where clone() had been called
- "32:pop %%ebx\n"
- "pop %%ecx\n"
- "pop %%edx\n"
- "pop %%esi\n"
- "pop %%edi\n"
- "pop %%ebp\n"
- "ret\n"
+ "jnz 35f\n" // Returning to createTrustedThread()
+
+ // Returning to the place where clone() had been called. We rely on
+ // using sigreturn() for restoring our registers. The caller already
+ // created a signal stack frame, and we patched the register values
+ // with the ones that were in effect prior to calling sandbox_clone().
+ "pop %%ebx\n"
+ "35:mov %%ebx, 0x38(%%esp)\n" // compute new %eip
+ "mov $119, %%eax\n" // NR_sigreturn
+ "int $0x80\n"
".pushsection \".rodata\"\n"
"100:.ascii \"Sandbox violation detected, program aborted\\n\"\n"