summaryrefslogtreecommitdiffstats
path: root/sandbox
diff options
context:
space:
mode:
authormarkus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-28 01:54:09 +0000
committermarkus@chromium.org <markus@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-04-28 01:54:09 +0000
commit745cf67ad12fc84d7c96ecf5c4e571717bbde176 (patch)
treed7267cd546f4985568810da3f828c9b9e07dee54 /sandbox
parent92b16aa35c651c1077a4738a348275ef10327b3c (diff)
downloadchromium_src-745cf67ad12fc84d7c96ecf5c4e571717bbde176.zip
chromium_src-745cf67ad12fc84d7c96ecf5c4e571717bbde176.tar.gz
chromium_src-745cf67ad12fc84d7c96ecf5c4e571717bbde176.tar.bz2
Added support for sigreturn() and rt_sigreturn(). On x86-32, this is
complicated by the fact that in Seccomp mode, we can only ever call sigreturn(). But in order to eventually support sigaction(), we want to be able to also call rt_sigreturn(). We solve this problem by rewriting the signal stack frame from an RT signal frame to a legacy frame. Fortunately, this part of the signal frame is stable between kernel versions. The unstable part (i.e. extended registers such as FP, MMX, SSE, ...) is always identical in both in both types of signal frames. None of these complications exist on x86-64 and it is relatively straight-forward to enable support for the system call. The only difficulty lies in the fact that its calling conventions are somewhat different from "normal" system calls. So, we have to handle rt_sigreturn() from within the syscallWrapper() and the segv() handler and cannot write it in C code. TEST=ad hoc testing until we have support for sigaction(). Then we can add a unittest BUG=37728 Review URL: http://codereview.chromium.org/1739011 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@45774 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'sandbox')
-rw-r--r--sandbox/linux/seccomp/ioctl.cc7
-rw-r--r--sandbox/linux/seccomp/library.cc26
-rw-r--r--sandbox/linux/seccomp/sandbox.cc219
-rw-r--r--sandbox/linux/seccomp/sandbox_impl.h2
-rw-r--r--sandbox/linux/seccomp/syscall.cc112
5 files changed, 239 insertions, 127 deletions
diff --git a/sandbox/linux/seccomp/ioctl.cc b/sandbox/linux/seccomp/ioctl.cc
index 07f1aa3..4d2b3c5c5 100644
--- a/sandbox/linux/seccomp/ioctl.cc
+++ b/sandbox/linux/seccomp/ioctl.cc
@@ -48,8 +48,11 @@ bool Sandbox::process_ioctl(int parentMapsFd, int sandboxFd, int threadFdPub,
ioctl_req.d, ioctl_req.req, ioctl_req.arg);
return true;
default:
- std::cerr << "Unsupported ioctl: 0x" << std::hex << ioctl_req.req <<
- std::endl;
+ if (Debug::isEnabled()) {
+ char buf[80];
+ sprintf(buf, "Unsupported ioctl: 0x%04X\n", ioctl_req.req);
+ Debug::message(buf);
+ }
SecureMem::abandonSystemCall(threadFd, rc);
return false;
}
diff --git a/sandbox/linux/seccomp/library.cc b/sandbox/linux/seccomp/library.cc
index e882ba4..68ff974 100644
--- a/sandbox/linux/seccomp/library.cc
+++ b/sandbox/linux/seccomp/library.cc
@@ -835,31 +835,29 @@ void Library::patchVDSO(char** extraSpace, int* extraLength){
//
// 58 POP %eax
// B8 77 00 00 00 MOV $0x77, %eax
- // E9 .. .. .. .. JMP syscallWrapper
+ // E8 .. .. .. .. CALL syscallWrapper
char* dest = getScratchSpace(maps_, __kernel_sigreturn, 11, extraSpace,
extraLength);
- memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE9", 7);
- *reinterpret_cast<char *>(dest + 7) =
- reinterpret_cast<char *>(&syscallWrapper) -
- reinterpret_cast<char *>(dest + 11);
+ memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE8", 7);
+ *reinterpret_cast<long *>(dest + 7) =
+ reinterpret_cast<char *>(&syscallWrapper) - dest - 11;;
*__kernel_sigreturn = '\xE9';
- *reinterpret_cast<char *>(__kernel_sigreturn + 1) =
- dest - reinterpret_cast<char *>(__kernel_sigreturn + 5);
+ *reinterpret_cast<long *>(__kernel_sigreturn + 1) =
+ dest - reinterpret_cast<char *>(__kernel_sigreturn) - 5;
}
if (__kernel_rt_sigreturn) {
// Replace the rt_sigreturn() system call with a jump to code that does:
//
// B8 AD 00 00 00 MOV $0xAD, %eax
- // E9 .. .. .. .. JMP syscallWrapper
+ // E8 .. .. .. .. CALL syscallWrapper
char* dest = getScratchSpace(maps_, __kernel_rt_sigreturn, 10, extraSpace,
extraLength);
- memcpy(dest, "\xB8\xAD\x00\x00\x00\xE9", 6);
- *reinterpret_cast<char *>(dest + 6) =
- reinterpret_cast<char *>(&syscallWrapper) -
- reinterpret_cast<char *>(dest + 10);
+ memcpy(dest, "\xB8\xAD\x00\x00\x00\xE8", 6);
+ *reinterpret_cast<long *>(dest + 6) =
+ reinterpret_cast<char *>(&syscallWrapper) - dest - 10;
*__kernel_rt_sigreturn = '\xE9';
- *reinterpret_cast<char *>(__kernel_rt_sigreturn + 1) =
- dest - reinterpret_cast<char *>(__kernel_rt_sigreturn + 5);
+ *reinterpret_cast<long *>(__kernel_rt_sigreturn + 1) =
+ dest - reinterpret_cast<char *>(__kernel_rt_sigreturn) - 5;
}
#endif
}
diff --git a/sandbox/linux/seccomp/sandbox.cc b/sandbox/linux/seccomp/sandbox.cc
index b7a249e..93ce12e 100644
--- a/sandbox/linux/seccomp/sandbox.cc
+++ b/sandbox/linux/seccomp/sandbox.cc
@@ -121,7 +121,8 @@ void Sandbox::setupSignalHandlers() {
// Set up SEGV handler for dealing with RDTSC instructions, system calls
// that have been rewritten to use INT0, and for sigpending() emulation.
- sa.sa_handler_ = segv();
+ sa.sa_sigaction_ = segv();
+ sa.sa_flags = SA_SIGINFO;
sys.sigaction(SIGSEGV, &sa, NULL);
// Unblock SIGSEGV and SIGCHLD
@@ -131,8 +132,8 @@ void Sandbox::setupSignalHandlers() {
sys.sigprocmask(SIG_UNBLOCK, &mask, 0);
}
-void (*Sandbox::segv())(int signo) {
- void (*fnc)(int signo);
+void (*Sandbox::segv())(int signo, SysCalls::siginfo *context, void *unused) {
+ void (*fnc)(int signo, SysCalls::siginfo *context, void *unused);
asm volatile(
"call 999f\n"
#if defined(__x86_64__)
@@ -198,7 +199,7 @@ void (*Sandbox::segv())(int signo) {
// rewrite the system call instruction. Retrieve the CPU register
// at the time of the segmentation fault and invoke syscallWrapper().
"8:cmpw $0x00CD, (%%r15)\n" // INT $0x0
- "jnz 14f\n"
+ "jnz 16f\n"
#ifndef NDEBUG
"lea 200f(%%rip), %%rdi\n"
"call playground$debugMessage\n"
@@ -239,10 +240,18 @@ void (*Sandbox::segv())(int signo) {
"mov %%r10, 0(%%rdx)\n" // old_set
"jmp 7b\n"
+ // Handle rt_sigreturn()
+ "12:cmp $15, %%rax\n" // NR_rt_sigreturn
+ "jnz 14f\n"
+ "mov 0xA8(%%rsp), %%rsp\n" // %rsp at time of segmentation fault
+ "13:syscall\n" // rt_sigreturn() is unrestricted
+ "mov $66, %%edi\n" // rt_sigreturn() should never return
+ "mov $231, %%eax\n" // NR_exit_group
+ "jmp 13b\n"
// Copy signal frame onto new stack. See clone.cc for details
- "12:cmp $56+0xF000, %%rax\n" // NR_clone + 0xF000
- "jnz 13f\n"
+ "14:cmp $56+0xF000, %%rax\n" // NR_clone + 0xF000
+ "jnz 15f\n"
"mov 0xA8(%%rsp), %%rcx\n" // %rsp at time of segmentation fault
"sub %%rsp, %%rcx\n" // %rcx = size of stack frame
"sub $8, %%rcx\n" // skip return address
@@ -256,7 +265,7 @@ void (*Sandbox::segv())(int signo) {
"jmp 7b\n"
// Forward system call to syscallWrapper()
- "13:lea 7b(%%rip), %%rcx\n"
+ "15:lea 7b(%%rip), %%rcx\n"
"push %%rcx\n"
"push 0xB8(%%rsp)\n" // %rip at time of segmentation fault
"lea playground$syscallWrapper(%%rip), %%rcx\n"
@@ -265,7 +274,7 @@ void (*Sandbox::segv())(int signo) {
// This was a genuine segmentation fault. Trigger the kernel's default
// signal disposition. The only way we can do this from seccomp mode
// is by blocking the signal and retriggering it.
- "14:mov $2, %%edi\n" // stderr
+ "16:mov $2, %%edi\n" // stderr
"lea 300f(%%rip), %%rsi\n" // "Segmentation fault\n"
"mov $301f-300f, %%edx\n"
"mov $1, %%eax\n" // NR_write
@@ -277,13 +286,13 @@ void (*Sandbox::segv())(int signo) {
// happened. If it is RDTSC, forward the request to the trusted
// thread.
"mov $-3, %%ebx\n" // request for RDTSC
- "mov 0x40(%%esp), %%ebp\n" // %eip at time of segmentation fault
+ "mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault
"cmpw $0x310F, (%%ebp)\n" // RDTSC
"jz 0f\n"
- "cmpw $0x010F, (%%ebp)\n"
- "jnz 8f\n"
+ "cmpw $0x010F, (%%ebp)\n" // RDTSCP
+ "jnz 9f\n"
"cmpb $0xF9, 2(%%ebp)\n"
- "jnz 8f\n"
+ "jnz 9f\n"
"mov $-4, %%ebx\n" // request for RDTSCP
"0:"
#ifndef NDEBUG
@@ -292,7 +301,7 @@ void (*Sandbox::segv())(int signo) {
"call playground$debugMessage\n"
"sub $4, %%esp\n"
#else
- "sub $8, %%esp\n"
+ "sub $8, %%esp\n" // allocate buffer for receiving timestamp
#endif
"push %%ebx\n"
"mov %%fs:16, %%ebx\n" // fd = threadFdPub
@@ -301,126 +310,178 @@ void (*Sandbox::segv())(int signo) {
"1:mov %%edx, %%eax\n" // NR_write
"int $0x80\n"
"cmp %%eax, %%edx\n"
- "jz 5f\n"
+ "jz 7f\n"
"cmp $-4, %%eax\n" // EINTR
"jz 1b\n"
- "2:add $12, %%esp\n"
- "movl $0, 0x34(%%esp)\n" // %eax at time of segmentation fault
- "movl $0, 0x2C(%%esp)\n" // %edx at time of segmentation fault
+ "2:add $12, %%esp\n" // remove temporary buffer from stack
+ "xor %%eax, %%eax\n"
+ "movl $0, 0xC8(%%esp)\n" // %edx at time of segmentation fault
"cmpw $0x310F, (%%ebp)\n" // RDTSC
"jz 3f\n"
- "movl $0, 0x30(%%esp)\n" // %ecx at time of segmentation fault
- "3:addl $2, 0x40(%%esp)\n" // %eip at time of segmentation fault
- "mov 0x40(%%esp), %%ebp\n" // %eip at time of segmentation fault
- "cmpw $0x010F, (%%ebp)\n" // RDTSC
- "jnz 4f\n"
- "addl $1, 0x40(%%esp)\n" // %eip at time of segmentation fault
- "4:ret\n"
- "5:mov $12, %%edx\n" // len = 3*sizeof(int)
- "6:mov $3, %%eax\n" // NR_read
+ "movl $0, 0xCC(%%esp)\n" // %ecx at time of segmentation fault
+ "3:mov %%eax, 0xD0(%%esp)\n" // %eax at time of segmentation fault
+ "4:mov 0xDC(%%esp), %%ebp\n" // %eip at time of segmentation fault
+ "addl $2, 0xDC(%%esp)\n" // %eip at time of segmentation fault
+ "cmpw $0x010F, (%%ebp)\n" // RDTSCP
+ "jnz 5f\n"
+ "addl $1, 0xDC(%%esp)\n" // %eip at time of segmentation fault
+ "5:sub $0x1C8, %%esp\n" // a legacy signal stack is much larger
+ "mov 0x1CC(%%esp), %%eax\n" // push signal number
+ "push %%eax\n"
+ "lea 0x270(%%esp), %%esi\n" // copy siginfo register values
+ "lea 0x4(%%esp), %%edi\n" // into new location
+ "mov $22, %%ecx\n"
+ "cld\n"
+ "rep movsl\n"
+ "mov 0x2C8(%%esp), %%ebx\n" // copy first half of signal mask
+ "mov %%ebx, 0x54(%%esp)\n"
+ "lea 6f, %%esi\n" // copy "magic" restorer function
+ "push %%esi\n" // push restorer function
+ "lea 0x2D4(%%esp), %%edi\n" // patch up retcode magic numbers
+ "movb $2, %%cl\n"
+ "rep movsl\n"
+ "ret\n" // return to restorer function
+
+ // The restorer function is sometimes used by gdb as a magic marker to
+ // recognize signal stack frames. Don't change any of the next three
+ // instructions.
+ "6:pop %%eax\n" // remove dummy argument (signo)
+ "mov $119, %%eax\n" // NR_sigreturn
+ "int $0x80\n"
+ "7:mov $12, %%edx\n" // len = 3*sizeof(int)
+ "8:mov $3, %%eax\n" // NR_read
"int $0x80\n"
"cmp $-4, %%eax\n" // EINTR
- "jz 6b\n"
+ "jz 8b\n"
"cmp %%eax, %%edx\n"
"jnz 2b\n"
"pop %%eax\n"
"pop %%edx\n"
"pop %%ecx\n"
- "mov %%edx, 0x2C(%%esp)\n" // %edx at time of segmentation fault
+ "mov %%edx, 0xC8(%%esp)\n" // %edx at time of segmentation fault
"cmpw $0x310F, (%%ebp)\n" // RDTSC
- "jz 7f\n"
- "mov %%ecx, 0x30(%%esp)\n" // %ecx at time of segmentation fault
- "7:mov %%eax, 0x34(%%esp)\n" // %eax at time of segmentation fault
+ "jz 3b\n"
+ "mov %%ecx, 0xCC(%%esp)\n" // %ecx at time of segmentation fault
"jmp 3b\n"
// If the instruction is INT 0, then this was probably the result
// of playground::Library being unable to find a way to safely
// rewrite the system call instruction. Retrieve the CPU register
// at the time of the segmentation fault and invoke syscallWrapper().
- "8:cmpw $0x00CD, (%%ebp)\n" // INT $0x0
- "jnz 16f\n"
+ "9:cmpw $0x00CD, (%%ebp)\n" // INT $0x0
+ "jnz 20f\n"
#ifndef NDEBUG
"lea 200f, %%eax\n"
"push %%eax\n"
"call playground$debugMessage\n"
"add $0x4, %%esp\n"
#endif
- "mov 0x34(%%esp), %%eax\n" // %eax at time of segmentation fault
- "mov 0x28(%%esp), %%ebx\n" // %ebx at time of segmentation fault
- "mov 0x30(%%esp), %%ecx\n" // %ecx at time of segmentation fault
- "mov 0x2C(%%esp), %%edx\n" // %edx at time of segmentation fault
- "mov 0x1C(%%esp), %%esi\n" // %esi at time of segmentation fault
- "mov 0x18(%%esp), %%edi\n" // %edi at time of segmentation fault
- "mov 0x20(%%esp), %%ebp\n" // %ebp at time of segmentation fault
+ "mov 0xD0(%%esp), %%eax\n" // %eax at time of segmentation fault
+ "mov 0xC4(%%esp), %%ebx\n" // %ebx at time of segmentation fault
+ "mov 0xCC(%%esp), %%ecx\n" // %ecx at time of segmentation fault
+ "mov 0xC8(%%esp), %%edx\n" // %edx at time of segmentation fault
+ "mov 0xB8(%%esp), %%esi\n" // %esi at time of segmentation fault
+ "mov 0xB4(%%esp), %%edi\n" // %edi at time of segmentation fault
+ "mov 0xB2(%%esp), %%ebp\n" // %ebp at time of segmentation fault
// Handle sigprocmask() and rt_sigprocmask()
"cmp $175, %%eax\n" // NR_rt_sigprocmask
- "jnz 9f\n"
+ "jnz 10f\n"
"mov $-22, %%eax\n" // -EINVAL
"cmp $8, %%esi\n" // %esi = sigsetsize (8 bytes = 64 signals)
- "jl 7b\n"
- "jmp 10f\n"
- "9:cmp $126, %%eax\n" // NR_sigprocmask
- "jnz 14f\n"
+ "jl 3b\n"
+ "jmp 11f\n"
+ "10:cmp $126, %%eax\n" // NR_sigprocmask
+ "jnz 15f\n"
"mov $-22, %%eax\n"
- "10:mov 0x58(%%esp), %%edi\n" // signal mask at time of segmentation fault
- "mov 0x5C(%%esp), %%ebp\n"
+ "11:mov 0xFC(%%esp), %%edi\n" // signal mask at time of segmentation fault
+ "mov 0x100(%%esp), %%ebp\n"
"test %%ecx, %%ecx\n" // only set mask, if set is non-NULL
- "jz 13f\n"
+ "jz 14f\n"
"mov 0(%%ecx), %%esi\n"
"mov 4(%%ecx), %%ecx\n"
"cmp $0, %%ebx\n" // %ebx = how (SIG_BLOCK)
- "jnz 11f\n"
- "or %%esi, 0x58(%%esp)\n" // signal mask at time of segmentation fault
- "or %%ecx, 0x5C(%%esp)\n"
- "jmp 13f\n"
- "11:cmp $1, %%ebx\n" // %ebx = how (SIG_UNBLOCK)
"jnz 12f\n"
+ "or %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault
+ "or %%ecx, 0x100(%%esp)\n"
+ "jmp 14f\n"
+ "12:cmp $1, %%ebx\n" // %ebx = how (SIG_UNBLOCK)
+ "jnz 13f\n"
"xor $-1, %%esi\n"
"xor $-1, %%ecx\n"
- "and %%esi, 0x58(%%esp)\n" // signal mask at time of segmentation fault
- "and %%ecx, 0x5C(%%esp)\n"
- "jmp 13f\n"
- "12:cmp $2, %%ebx\n" // %ebx = how (SIG_SETMASK)
- "jnz 7b\n"
- "mov %%esi, 0x58(%%esp)\n" // signal mask at time of segmentation fault
- "mov %%ecx, 0x5C(%%esp)\n"
- "13:xor %%eax, %%eax\n"
+ "and %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault
+ "and %%ecx, 0x100(%%esp)\n"
+ "jmp 14f\n"
+ "13:cmp $2, %%ebx\n" // %ebx = how (SIG_SETMASK)
+ "jnz 3b\n"
+ "mov %%esi, 0xFC(%%esp)\n" // signal mask at time of segmentation fault
+ "mov %%ecx, 0x100(%%esp)\n"
+ "14:xor %%eax, %%eax\n"
"test %%edx, %%edx\n" // only return old mask, if set is non-NULL
- "jz 7b\n"
+ "jz 3b\n"
"mov %%edi, 0(%%edx)\n" // old_set
"mov %%ebp, 4(%%edx)\n"
- "jmp 7b\n"
+ "jmp 3b\n"
- // Copy signal frame onto new stack. See clone.cc for details
- "14:cmp $120+0xF000, %%eax\n" // NR_clone + 0xF000
- "jnz 15f\n"
- "mov 0x24(%%esp), %%ecx\n" // %esp at time of segmentation fault
- "sub %%esp, %%ecx\n" // %ecx = size of stack frame
- "sub $8, %%ecx\n" // skip return address and dummy
- "mov %%ecx, %%eax\n" // return size of signal stack frame
+ // Handle sigreturn() and rt_sigreturn()
+ // See syscall.cc for a discussion on how we can emulate rt_sigreturn()
+ // by calling sigreturn() with a suitably adjusted stack.
+ "15:cmp $119, %%eax\n" // NR_sigreturn
+ "jnz 17f\n"
+ "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault
+ "16:int $0x80\n" // sigreturn() is unrestricted
+ "17:cmp $173, %%eax\n" // NR_rt_sigreturn
+ "jnz 18f\n"
+ "mov 0xC0(%%esp), %%esp\n" // %esp at time of segmentation fault
+ "sub $4, %%esp\n" // add fake return address
+ "jmp 4b\n"
+
+ // Copy signal frame onto new stack. In the process, we have to convert
+ // it from an RT signal frame to a legacy signal frame.
+ // See clone.cc for details
+ "18:cmp $120+0xF000, %%eax\n" // NR_clone + 0xF000
+ "jnz 19f\n"
+ "mov 0xC0(%%esp), %%ecx\n" // %esp at time of segmentation fault
+ "sub %%esp, %%ecx\n" // %ecx = size of RT stack frame
+ "mov %%ecx, %%eax\n"
+ "add $0x1C8, %%eax\n" // adjust for size of legacy stack frame
+ "sub $0x100, %%ecx\n"
"mov 0(%%edx), %%edi\n" // stack for newly clone()'d thread
"sub %%ecx, %%edi\n" // copy onto new stack
- "mov %%edi, 0(%%edx)\n" // allocate space on new stack
- "lea 8(%%esp), %%esi\n" // copy from current stack
+ "lea 0x100(%%esp), %%esi\n"
"cld\n"
- "rep movsb\n"
- "jmp 7b\n"
+ "rep movsb\n" // copy parts of RT stack(sigmask, FP state)
+ "mov 0xF0(%%esp), %%ebx\n" // adjust pointer to fpstate
+ "sub %%esi, %%ebx\n"
+ "add %%edi, %%ebx\n"
+ "sub %%eax, %%edi\n"
+ "mov %%edi, 0(%%edx)\n" // allocate space on new stack
+ "lea 0xA4(%%esp), %%esi\n" // copy sigcontext from current stack
+ "mov $0x16, %%ecx\n"
+ "rep movsl\n"
+ "mov %%ebx, -0xC(%%edi)\n" // set pointer to fpstate
+ "mov 0xFC(%%esp), %%ebx\n" // copy first half of signal mask
+ "mov %%ebx, -0x8(%%edi)\n"
+ "mov %%eax, -0x2C(%%edi)\n" // return size of stack frame in %%eax
+ "addl $2, -0x20(%%edi)\n" // adjust %eip
+ "mov 0(%%edx), %%esp\n"
+ "mov $119, %%eax\n" // NR_sigreturn
+ "int $0x80\n"
// Forward system call to syscallWrapper()
- "15:call playground$syscallWrapper\n"
- "jmp 7b\n"
+ "19:call playground$syscallWrapper\n"
+ "jmp 3b\n"
// This was a genuine segmentation fault. Trigger the kernel's default
// signal disposition. The only way we can do this from seccomp mode
// is by blocking the signal and retriggering it.
- "16:mov $2, %%ebx\n" // stderr
+ "20:mov $2, %%ebx\n" // stderr
"lea 300f, %%ecx\n" // "Segmentation fault\n"
"mov $301f-300f, %%edx\n"
"mov $4, %%eax\n" // NR_write
"int $0x80\n"
- "orb $4, 0x59(%%esp)\n" // signal mask at time of segmentation fault
- "ret\n"
+ "orb $4, 0xFD(%%esp)\n" // signal mask at time of segmentation fault
+ "jmp 4b\n"
#else
#error Unsupported target platform
#endif
diff --git a/sandbox/linux/seccomp/sandbox_impl.h b/sandbox/linux/seccomp/sandbox_impl.h
index 38a1803..9c49ffc 100644
--- a/sandbox/linux/seccomp/sandbox_impl.h
+++ b/sandbox/linux/seccomp/sandbox_impl.h
@@ -598,7 +598,7 @@ class Sandbox {
// The SEGV handler knows how to handle RDTSC instructions
static void setupSignalHandlers();
- static void (*segv())(int signo);
+ static void (*segv())(int signo, SysCalls::siginfo *context, void *unused);
// If no specific handler has been registered for a system call, call this
// function which asks the trusted thread to perform the call. This is used
diff --git a/sandbox/linux/seccomp/syscall.cc b/sandbox/linux/seccomp/syscall.cc
index 76e96e4..681fec9 100644
--- a/sandbox/linux/seccomp/syscall.cc
+++ b/sandbox/linux/seccomp/syscall.cc
@@ -46,8 +46,17 @@ asm(
".globl playground$syscallWrapper\n"
".type playground$syscallWrapper, @function\n"
#if defined(__x86_64__)
+ // Check for rt_sigreturn(). It needs to be handled specially.
+ "cmp $15, %rax\n" // NR_rt_sigreturn
+ "jnz 1f\n"
+ "add $0x90, %rsp\n" // pop return addresses and red zone
+ "0:syscall\n" // rt_sigreturn() is unrestricted
+ "mov $66, %edi\n" // rt_sigreturn() should never return
+ "mov $231, %eax\n" // NR_exit_group
+ "jmp 0b\n"
+
// Save all registers
- "push %rbp\n"
+ "1:push %rbp\n"
"mov %rsp, %rbp\n"
"push %rbx\n"
"push %rcx\n"
@@ -70,7 +79,7 @@ asm(
// Check range of system call
"cmp playground$maxSyscall(%rip), %eax\n"
- "ja 1f\n"
+ "ja 3f\n"
// Retrieve function call from system call table (c.f. syscall_table.c).
// We have three different types of entries; zero for denied system calls,
@@ -86,9 +95,9 @@ asm(
// Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise
// jump to fallback handler.
"cmp $1, %r10\n"
- "jbe 1f\n"
+ "jbe 3f\n"
"call *%r10\n"
- "0:"
+ "2:"
// Restore CPU registers, except for %rax which was set by the system call.
"pop %r15\n"
@@ -113,7 +122,7 @@ asm(
// Return to caller
"ret\n"
- "1:"
+ "3:"
// If we end up calling a specific handler, we don't need to know the
// system call number. However, in the generic case, we do. Shift
// registers so that the system call number becomes visible as the
@@ -129,10 +138,55 @@ asm(
// Call default handler.
"call playground$defaultSystemCallHandler\n"
"pop %r9\n"
- "jmp 0b\n"
+ "jmp 2b\n"
#elif defined(__i386__)
+ "cmp $119, %eax\n" // NR_sigreturn
+ "jnz 1f\n"
+ "add $0x4, %esp\n" // pop return address
+ "0:int $0x80\n" // sigreturn() is unrestricted
+ "mov $66, %ebx\n" // sigreturn() should never return
+ "mov %ebx, %eax\n" // NR_exit
+ "jmp 0b\n"
+ "1:cmp $173, %eax\n" // NR_rt_sigreturn
+ "jnz 3f\n"
+
+ // Convert rt_sigframe into sigframe, allowing us to call sigreturn().
+ // This is possible since the first part of signal stack frames have
+ // stayed very stable since the earliest kernel versions. While never
+ // officially documented, lots of user space applications rely on this
+ // part of the ABI, and kernel developers have been careful to maintain
+ // backwards compatibility.
+ // In general, the rt_sigframe includes a lot of extra information that
+ // the signal handler can look at. Most notably, this means a complete
+ // siginfo record.
+ // Fortunately though, the kernel doesn't look at any of this extra data
+ // when returning from a signal handler. So, we can safely convert an
+ // rt_sigframe to a legacy sigframe, discarding the extra data in the
+ // process. Interestingly, the legacy signal frame is actually larger than
+ // the rt signal frame, as it includes a lot more padding.
+ "sub $0x1C8, %esp\n" // a legacy signal stack is much larger
+ "mov 0x1CC(%esp), %eax\n" // push signal number
+ "push %eax\n"
+ "lea 0x270(%esp), %esi\n" // copy siginfo register values
+ "lea 0x4(%esp), %edi\n" // into new location
+ "mov $0x16, %ecx\n"
+ "cld\n"
+ "rep movsl\n"
+ "mov 0x2C8(%esp), %ebx\n" // copy first half of signal mask
+ "mov %ebx, 0x54(%esp)\n"
+ "lea 2f, %esi\n"
+ "push %esi\n" // push restorer function
+ "lea 0x2D4(%esp), %edi\n" // patch up retcode magic numbers
+ "movb $2, %cl\n"
+ "rep movsl\n"
+ "ret\n" // return to restorer function
+ "2:pop %eax\n" // remove dummy argument (signo)
+ "mov $119, %eax\n" // NR_sigaction
+ "int $0x80\n"
+
+
// Preserve all registers
- "push %ebx\n"
+ "3:push %ebx\n"
"push %ecx\n"
"push %edx\n"
"push %esi\n"
@@ -150,7 +204,7 @@ asm(
// Check range of system call
"cmp playground$maxSyscall, %eax\n"
- "ja 5f\n"
+ "ja 9f\n"
// We often have long sequences of calls to gettimeofday(). This is
// needlessly expensive. Coalesce them into a single call.
@@ -164,9 +218,9 @@ asm(
// or maybe, if we have recently seen requests to compute
// the time. There might be a repeated pattern of those.
"cmp $78, %eax\n" // __NR_gettimeofday
- "jnz 2f\n"
+ "jnz 6f\n"
"cmp %eax, %fs:0x102C-0x58\n" // last system call
- "jnz 0f\n"
+ "jnz 4f\n"
// This system call and the last system call prior to this one both are
// calls to gettimeofday(). Try to avoid making the new call and just
@@ -174,7 +228,7 @@ asm(
// Just in case the caller is spinning on the result from gettimeofday(),
// every so often, call the actual system call.
"decl %fs:0x1030-0x58\n" // countdown calls to gettimofday()
- "jz 0f\n"
+ "jz 4f\n"
// Atomically read the 64bit word representing last-known timestamp and
// return it to the caller. On x86-32 this is a little more complicated and
@@ -186,11 +240,11 @@ asm(
"mov %edx, 4(%ebx)\n"
"xor %eax, %eax\n"
"add $28, %esp\n"
- "jmp 4f\n"
+ "jmp 8f\n"
// This is a call to gettimeofday(), but we don't have a valid cached
// result, yet.
- "0:mov %eax, %fs:0x102C-0x58\n" // remember syscall number
+ "4:mov %eax, %fs:0x102C-0x58\n" // remember syscall number
"movl $500, %fs:0x1030-0x58\n" // make system call, each 500 invocations
"call playground$defaultSystemCallHandler\n"
@@ -201,17 +255,17 @@ asm(
"mov 0(%ebx), %ebx\n"
"mov 100f, %eax\n"
"mov 101f, %edx\n"
- "1:lock; cmpxchg8b 100f\n"
- "jnz 1b\n"
+ "5:lock; cmpxchg8b 100f\n"
+ "jnz 5b\n"
"xor %eax, %eax\n"
- "jmp 6f\n"
+ "jmp 10f\n"
// Remember the number of the last system call made. We deliberately do
// not remember calls to gettid(), as we have often seen long sequences
// of calls to just gettimeofday() and gettid(). In that situation, we
// would still like to coalesce the gettimeofday() calls.
- "2:cmp $224, %eax\n" // __NR_gettid
- "jz 3f\n"
+ "6:cmp $224, %eax\n" // __NR_gettid
+ "jz 7f\n"
"mov %eax, %fs:0x102C-0x58\n" // remember syscall number
// Retrieve function call from system call table (c.f. syscall_table.c).
@@ -219,7 +273,7 @@ asm(
// that should be handled by the defaultSystemCallHandler(); minus one
// for unrestricted system calls that need to be forwarded to the trusted
// thread; and function pointers to specific handler functions.
- "3:shl $3, %eax\n"
+ "7:shl $3, %eax\n"
"lea playground$syscallTable, %ebx\n"
"add %ebx, %eax\n"
"mov 0(%eax), %eax\n"
@@ -227,13 +281,13 @@ asm(
// Jump to function if non-null and not UNRESTRICTED_SYSCALL, otherwise
// jump to fallback handler.
"cmp $1, %eax\n"
- "jbe 5f\n"
+ "jbe 9f\n"
"add $4, %esp\n"
"call *%eax\n"
"add $24, %esp\n"
// Restore CPU registers, except for %eax which was set by the system call.
- "4:pop %ebp\n"
+ "8:pop %ebp\n"
"pop %edi\n"
"pop %esi\n"
"pop %edx\n"
@@ -244,9 +298,9 @@ asm(
"ret\n"
// Call default handler.
- "5:call playground$defaultSystemCallHandler\n"
- "6:add $28, %esp\n"
- "jmp 4b\n"
+ "9:call playground$defaultSystemCallHandler\n"
+ "10:add $28, %esp\n"
+ "jmp 8b\n"
".pushsection \".bss\"\n"
".balign 8\n"
@@ -267,9 +321,9 @@ void* Sandbox::defaultSystemCallHandler(int syscallNum, void* arg0, void* arg1,
void* arg5) {
// TODO(markus): The following comment is currently not true, we do intercept these system calls. Try to fix that.
- // We try to avoid intercepting read(), write(), and sigreturn(), as
- // these system calls are not restricted in Seccomp mode. But depending on
- // the exact instruction sequence in libc, we might not be able to reliably
+ // We try to avoid intercepting read(), and write(), as these system calls
+ // are not restricted in Seccomp mode. But depending on the exact
+ // instruction sequence in libc, we might not be able to reliably
// filter out these system calls at the time when we instrument the code.
SysCalls sys;
long rc;
@@ -283,10 +337,6 @@ void* Sandbox::defaultSystemCallHandler(int syscallNum, void* arg0, void* arg1,
Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
rc = sys.write((long)arg0, arg1, (size_t)arg2);
break;
- case __NR_rt_sigreturn:
- Debug::syscall(&tm, syscallNum, "Allowing unrestricted system call");
- rc = sys.rt_sigreturn((unsigned long)arg0);
- break;
default:
if (Debug::isEnabled()) {
// In debug mode, prevent stderr from being closed