summaryrefslogtreecommitdiffstats
path: root/sandbox/linux/seccomp/library.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sandbox/linux/seccomp/library.cc')
-rw-r--r--sandbox/linux/seccomp/library.cc1208
1 files changed, 0 insertions, 1208 deletions
diff --git a/sandbox/linux/seccomp/library.cc b/sandbox/linux/seccomp/library.cc
deleted file mode 100644
index 8dd9b93..0000000
--- a/sandbox/linux/seccomp/library.cc
+++ /dev/null
@@ -1,1208 +0,0 @@
-// Copyright (c) 2010 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#define XOPEN_SOURCE 500
-#include <algorithm>
-#include <elf.h>
-#include <errno.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <linux/unistd.h>
-#include <set>
-#include <signal.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ptrace.h>
-#include <sys/resource.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include "allocator.h"
-#include "debug.h"
-#include "library.h"
-#include "sandbox_impl.h"
-#include "syscall.h"
-#include "syscall_table.h"
-#include "x86_decode.h"
-
-#if defined(__x86_64__)
-typedef Elf64_Phdr Elf_Phdr;
-typedef Elf64_Rela Elf_Rel;
-
-typedef Elf64_Half Elf_Half;
-typedef Elf64_Word Elf_Word;
-typedef Elf64_Sword Elf_Sword;
-typedef Elf64_Xword Elf_Xword;
-typedef Elf64_Sxword Elf_Sxword;
-typedef Elf64_Off Elf_Off;
-typedef Elf64_Section Elf_Section;
-typedef Elf64_Versym Elf_Versym;
-
-#define ELF_ST_BIND ELF64_ST_BIND
-#define ELF_ST_TYPE ELF64_ST_TYPE
-#define ELF_ST_INFO ELF64_ST_INFO
-#define ELF_R_SYM ELF64_R_SYM
-#define ELF_R_TYPE ELF64_R_TYPE
-#define ELF_R_INFO ELF64_R_INFO
-
-#define ELF_REL_PLT ".rela.plt"
-#define ELF_JUMP_SLOT R_X86_64_JUMP_SLOT
-#elif defined(__i386__)
-typedef Elf32_Phdr Elf_Phdr;
-typedef Elf32_Rel Elf_Rel;
-
-typedef Elf32_Half Elf_Half;
-typedef Elf32_Word Elf_Word;
-typedef Elf32_Sword Elf_Sword;
-typedef Elf32_Xword Elf_Xword;
-typedef Elf32_Sxword Elf_Sxword;
-typedef Elf32_Off Elf_Off;
-typedef Elf32_Section Elf_Section;
-typedef Elf32_Versym Elf_Versym;
-
-#define ELF_ST_BIND ELF32_ST_BIND
-#define ELF_ST_TYPE ELF32_ST_TYPE
-#define ELF_ST_INFO ELF32_ST_INFO
-#define ELF_R_SYM ELF32_R_SYM
-#define ELF_R_TYPE ELF32_R_TYPE
-#define ELF_R_INFO ELF32_R_INFO
-
-#define ELF_REL_PLT ".rel.plt"
-#define ELF_JUMP_SLOT R_386_JMP_SLOT
-#else
-#error Unsupported target platform
-#endif
-
-namespace playground {
-
-char* Library::__kernel_vsyscall;
-char* Library::__kernel_sigreturn;
-char* Library::__kernel_rt_sigreturn;
-
-Library::~Library() {
- if (image_size_) {
- // We no longer need access to a full mapping of the underlying library
- // file. Move the temporarily extended mapping back to where we originally
- // found. Make sure to preserve any changes that we might have made since.
- Sandbox::SysCalls sys;
- sys.mprotect(image_, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
- if (memcmp(image_, memory_ranges_.rbegin()->second.start, 4096)) {
- // Only copy data, if we made any changes in this data. Otherwise there
- // is no need to create another modified COW mapping.
- memcpy(image_, memory_ranges_.rbegin()->second.start, 4096);
- }
- sys.mprotect(image_, 4096, PROT_READ | PROT_EXEC);
- sys.mremap(image_, image_size_, 4096, MREMAP_MAYMOVE | MREMAP_FIXED,
- memory_ranges_.rbegin()->second.start);
- }
-}
-
-char* Library::getBytes(char* dst, const char* src, ssize_t len) {
- // Some kernels don't allow accessing the VDSO from write()
- if (isVDSO_ &&
- src >= memory_ranges_.begin()->second.start &&
- src <= memory_ranges_.begin()->second.stop) {
- ssize_t max =
- reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) - src;
- if (len > max) {
- len = max;
- }
- memcpy(dst, src, len);
- return dst;
- }
-
- // Read up to "len" bytes from "src" and copy them to "dst". Short
- // copies are possible, if we are at the end of a mapping. Returns
- // NULL, if the operation failed completely.
- static int helper_socket[2];
- Sandbox::SysCalls sys;
- if (!helper_socket[0] && !helper_socket[1]) {
- // Copy data through a socketpair, as this allows us to access it
- // without incurring a segmentation fault.
- sys.socketpair(AF_UNIX, SOCK_STREAM, 0, helper_socket);
- }
- char* ptr = dst;
- int inc = 4096;
- while (len > 0) {
- ssize_t l = inc == 1 ? inc : 4096 - (reinterpret_cast<long>(src) & 0xFFF);
- if (l > len) {
- l = len;
- }
- l = NOINTR_SYS(sys.write(helper_socket[0], src, l));
- if (l == -1) {
- if (sys.my_errno == EFAULT) {
- if (inc == 1) {
- if (ptr == dst) {
- return NULL;
- }
- break;
- }
- inc = 1;
- continue;
- } else {
- return NULL;
- }
- }
- l = sys.read(helper_socket[1], ptr, l);
- if (l <= 0) {
- return NULL;
- }
- ptr += l;
- src += l;
- len -= l;
- }
- return dst;
-}
-
-char *Library::get(Elf_Addr offset, char *buf, size_t len) {
- if (!valid_) {
- memset(buf, 0, len);
- return NULL;
- }
- RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
- if (iter == memory_ranges_.end()) {
- memset(buf, 0, len);
- return NULL;
- }
- offset -= iter->first;
- long size = reinterpret_cast<char *>(iter->second.stop) -
- reinterpret_cast<char *>(iter->second.start);
- if (offset > size - len) {
- memset(buf, 0, len);
- return NULL;
- }
- char *src = reinterpret_cast<char *>(iter->second.start) + offset;
- memset(buf, 0, len);
- if (!getBytes(buf, src, len)) {
- return NULL;
- }
- return buf;
-}
-
-Library::string Library::get(Elf_Addr offset) {
- if (!valid_) {
- return "";
- }
- RangeMap::const_iterator iter = memory_ranges_.lower_bound(offset);
- if (iter == memory_ranges_.end()) {
- return "";
- }
- offset -= iter->first;
- const char *start = reinterpret_cast<char *>(iter->second.start) + offset;
- const char *stop = reinterpret_cast<char *>(iter->second.stop) + offset;
- char buf[4096] = { 0 };
- getBytes(buf, start, stop - start >= (int)sizeof(buf) ?
- sizeof(buf) - 1 : stop - start);
- start = buf;
- stop = buf;
- while (*stop) {
- ++stop;
- }
- string s = stop > start ? string(start, stop - start) : "";
- return s;
-}
-
-char *Library::getOriginal(Elf_Addr offset, char *buf, size_t len) {
- if (!valid_) {
- memset(buf, 0, len);
- return NULL;
- }
- Sandbox::SysCalls sys;
- if (!image_ && !isVDSO_ && !memory_ranges_.empty() &&
- memory_ranges_.rbegin()->first == 0) {
- // Extend the mapping of the very first page of the underlying library
- // file. This way, we can read the original file contents of the entire
- // library.
- // We have to be careful, because doing so temporarily removes the first
- // 4096 bytes of the library from memory. And we don't want to accidentally
- // unmap code that we are executing. So, only use functions that can be
- // inlined.
- void* start = memory_ranges_.rbegin()->second.start;
- image_size_ = memory_ranges_.begin()->first +
- (reinterpret_cast<char *>(memory_ranges_.begin()->second.stop) -
- reinterpret_cast<char *>(memory_ranges_.begin()->second.start));
- if (image_size_ < 8192) {
- // It is possible to create a library that is only a single page in
- // size. In that case, we have to make sure that we artificially map
- // one extra page past the end of it, as our code relies on mremap()
- // actually moving the mapping.
- image_size_ = 8192;
- }
- image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_,
- MREMAP_MAYMOVE));
- if (image_size_ == 8192 && image_ == start) {
- // We really mean it, when we say we want the memory to be moved.
- image_ = reinterpret_cast<char *>(sys.mremap(start, 4096, image_size_,
- MREMAP_MAYMOVE));
- sys.munmap(reinterpret_cast<char *>(start) + 4096, 4096);
- }
- if (image_ == MAP_FAILED) {
- image_ = NULL;
- } else {
- sys.MMAP(start, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- for (int i = 4096 / sizeof(long); --i;
- reinterpret_cast<long *>(start)[i] =
- reinterpret_cast<long *>(image_)[i]);
- }
- }
-
- if (image_) {
- if (offset + len > image_size_) {
- // It is quite likely that we initially did not map the entire file as
- // we did not know how large it is. So, if necessary, try to extend the
- // mapping.
- size_t new_size = (offset + len + 4095) & ~4095;
- char* tmp =
- reinterpret_cast<char *>(sys.mremap(image_, image_size_, new_size,
- MREMAP_MAYMOVE));
- if (tmp != MAP_FAILED) {
- image_ = tmp;
- image_size_ = new_size;
- }
- }
- if (buf && offset + len <= image_size_) {
- return reinterpret_cast<char *>(memcpy(buf, image_ + offset, len));
- }
- return NULL;
- }
- return buf ? get(offset, buf, len) : NULL;
-}
-
-Library::string Library::getOriginal(Elf_Addr offset) {
- if (!valid_) {
- return "";
- }
- // Make sure we actually have a mapping that we can access. If the string
- // is located at the end of the image, we might not yet have extended the
- // mapping sufficiently.
- if (!image_ || image_size_ <= offset) {
- getOriginal(offset, NULL, 1);
- }
-
- if (image_) {
- if (offset < image_size_) {
- char* start = image_ + offset;
- char* stop = start;
- while (stop < image_ + image_size_ && *stop) {
- ++stop;
- if (stop >= image_ + image_size_) {
- getOriginal(stop - image_, NULL, 1);
- }
- }
- return string(start, stop - start);
- }
- return "";
- }
- return get(offset);
-}
-
-const Elf_Ehdr* Library::getEhdr() {
- if (!valid_) {
- return NULL;
- }
- return &ehdr_;
-}
-
-const Elf_Shdr* Library::getSection(const string& section) {
- if (!valid_) {
- return NULL;
- }
- SectionTable::const_iterator iter = section_table_.find(section);
- if (iter == section_table_.end()) {
- return NULL;
- }
- return &iter->second.second;
-}
-
-int Library::getSectionIndex(const string& section) {
- if (!valid_) {
- return -1;
- }
- SectionTable::const_iterator iter = section_table_.find(section);
- if (iter == section_table_.end()) {
- return -1;
- }
- return iter->second.first;
-}
-
-void Library::makeWritable(bool state) const {
- for (RangeMap::const_iterator iter = memory_ranges_.begin();
- iter != memory_ranges_.end(); ++iter) {
- const Range& range = iter->second;
- long length = reinterpret_cast<char *>(range.stop) -
- reinterpret_cast<char *>(range.start);
- Sandbox::SysCalls sys;
- sys.mprotect(range.start, length,
- range.prot | (state ? PROT_WRITE : 0));
- }
-}
-
-bool Library::isSafeInsn(unsigned short insn) {
- // Check if the instruction has no unexpected side-effects. If so, it can
- // be safely relocated from the function that we are patching into the
- // out-of-line scratch space that we are setting up. This is often necessary
- // to make room for the JMP into the scratch space.
- return ((insn & 0x7) < 0x6 && (insn & 0xF0) < 0x40
- /* ADD, OR, ADC, SBB, AND, SUB, XOR, CMP */) ||
- #if defined(__x86_64__)
- insn == 0x63 /* MOVSXD */ ||
- #endif
- (insn >= 0x80 && insn <= 0x8E /* ADD, OR, ADC,
- SBB, AND, SUB, XOR, CMP, TEST, XCHG, MOV, LEA */) ||
- (insn == 0x90) || /* NOP */
- (insn >= 0xA0 && insn <= 0xA9) /* MOV, TEST */ ||
- (insn >= 0xB0 && insn <= 0xBF /* MOV */) ||
- (insn >= 0xC0 && insn <= 0xC1) || /* Bit Shift */
- (insn >= 0xD0 && insn <= 0xD3) || /* Bit Shift */
- (insn >= 0xC6 && insn <= 0xC7 /* MOV */) ||
- (insn == 0xF7) /* TEST, NOT, NEG, MUL, IMUL, DIV, IDIV */;
-}
-
-char* Library::getScratchSpace(const Maps* maps, char* near, int needed,
- char** extraSpace, int* extraLength) {
- if (needed > *extraLength ||
- labs(*extraSpace - reinterpret_cast<char *>(near)) > (1536 << 20)) {
- if (*extraSpace) {
- // Start a new scratch page and mark any previous page as write-protected
- Sandbox::SysCalls sys;
- sys.mprotect(*extraSpace, 4096, PROT_READ|PROT_EXEC);
- }
- // Our new scratch space is initially executable and writable.
- *extraLength = 4096;
- *extraSpace = maps->allocNearAddr(near, *extraLength,
- PROT_READ|PROT_WRITE|PROT_EXEC);
- }
- if (*extraSpace) {
- *extraLength -= needed;
- return *extraSpace + *extraLength;
- }
- Sandbox::die("Insufficient space to intercept system call");
-}
-
-void Library::patchSystemCallsInFunction(const Maps* maps, char *start,
- char *end, char** extraSpace,
- int* extraLength) {
- std::set<char *, std::less<char *>, SystemAllocator<char *> > branch_targets;
- for (char *ptr = start; ptr < end; ) {
- unsigned short insn = next_inst((const char **)&ptr, __WORDSIZE == 64);
- char *target;
- if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ || insn == 0xEB /* JMP */) {
- target = ptr + (reinterpret_cast<signed char *>(ptr))[-1];
- } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ ||
- (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) {
- target = ptr + (reinterpret_cast<int *>(ptr))[-1];
- } else {
- continue;
- }
- branch_targets.insert(target);
- }
- struct Code {
- char* addr;
- int len;
- unsigned short insn;
- bool is_ip_relative;
- } code[5] = { { 0 } };
- int codeIdx = 0;
- char* ptr = start;
- while (ptr < end) {
- // Keep a ring-buffer of the last few instruction in order to find the
- // correct place to patch the code.
- char *mod_rm;
- code[codeIdx].addr = ptr;
- code[codeIdx].insn = next_inst((const char **)&ptr, __WORDSIZE == 64,
- 0, 0, &mod_rm, 0, 0);
- code[codeIdx].len = ptr - code[codeIdx].addr;
- code[codeIdx].is_ip_relative =
- #if defined(__x86_64__)
- mod_rm && (*mod_rm & 0xC7) == 0x5;
- #else
- false;
- #endif
-
- // Whenever we find a system call, we patch it with a jump to out-of-line
- // code that redirects to our system call wrapper.
- bool is_syscall = true;
- #if defined(__x86_64__)
- bool is_indirect_call = false;
- if (code[codeIdx].insn == 0x0F05 /* SYSCALL */ ||
- // In addition, on x86-64, we need to redirect all CALLs between the
- // VDSO and the VSyscalls page. We want these to jump to our own
- // modified copy of the VSyscalls. As we know that the VSyscalls are
- // always more than 2GB away from the VDSO, the compiler has to
- // generate some form of indirect jumps. We can find all indirect
- // CALLs and redirect them to a separate scratch area, where we can
- // inspect the destination address. If it indeed points to the
- // VSyscall area, we then adjust the destination address accordingly.
- (is_indirect_call =
- (isVDSO_ && vsys_offset_ && code[codeIdx].insn == 0xFF &&
- !code[codeIdx].is_ip_relative &&
- mod_rm && (*mod_rm & 0x38) == 0x10 /* CALL (indirect) */))) {
- is_syscall = !is_indirect_call;
- #elif defined(__i386__)
- bool is_gs_call = false;
- if (code[codeIdx].len == 7 &&
- code[codeIdx].insn == 0xFF &&
- code[codeIdx].addr[2] == '\x15' /* CALL (indirect) */ &&
- code[codeIdx].addr[0] == '\x65' /* %gs prefix */) {
- char* target;
- asm volatile("mov %%gs:(%1), %0\n"
- : "=a"(target)
- : "c"(*reinterpret_cast<int *>(code[codeIdx].addr+3)));
- if (target == __kernel_vsyscall) {
- is_gs_call = true;
- // TODO(markus): also handle the other vsyscalls
- }
- }
- if (is_gs_call ||
- (code[codeIdx].insn == 0xCD &&
- code[codeIdx].addr[1] == '\x80' /* INT $0x80 */)) {
- #else
- #error Unsupported target platform
- #endif
- // Found a system call. Search backwards to figure out how to redirect
- // the code. We will need to overwrite a couple of instructions and,
- // of course, move these instructions somewhere else.
- int startIdx = codeIdx;
- int endIdx = codeIdx;
- int length = code[codeIdx].len;
- for (int idx = codeIdx;
- (idx = (idx + (sizeof(code) / sizeof(struct Code)) - 1) %
- (sizeof(code) / sizeof(struct Code))) != codeIdx; ) {
- std::set<char *>::const_iterator iter =
- std::upper_bound(branch_targets.begin(), branch_targets.end(),
- code[idx].addr);
- if (iter != branch_targets.end() && *iter < ptr) {
- // Found a branch pointing to somewhere past our instruction. This
- // instruction cannot be moved safely. Leave it in place.
- break;
- }
- if (code[idx].addr && !code[idx].is_ip_relative &&
- isSafeInsn(code[idx].insn)) {
- // These are all benign instructions with no side-effects and no
- // dependency on the program counter. We should be able to safely
- // relocate them.
- startIdx = idx;
- length = ptr - code[startIdx].addr;
- } else {
- break;
- }
- }
- // Search forward past the system call, too. Sometimes, we can only
- // find relocatable instructions following the system call.
- #if defined(__i386__)
- findEndIdx:
- #endif
- char *next = ptr;
- for (int i = codeIdx;
- next < end &&
- (i = (i + 1) % (sizeof(code) / sizeof(struct Code))) != startIdx;
- ) {
- std::set<char *>::const_iterator iter =
- std::lower_bound(branch_targets.begin(), branch_targets.end(),
- next);
- if (iter != branch_targets.end() && *iter == next) {
- // Found branch target pointing to our instruction
- break;
- }
- char *tmp_rm;
- code[i].addr = next;
- code[i].insn = next_inst((const char **)&next, __WORDSIZE == 64,
- 0, 0, &tmp_rm, 0, 0);
- code[i].len = next - code[i].addr;
- code[i].is_ip_relative = tmp_rm && (*tmp_rm & 0xC7) == 0x5;
- if (!code[i].is_ip_relative && isSafeInsn(code[i].insn)) {
- endIdx = i;
- length = next - code[startIdx].addr;
- } else {
- break;
- }
- }
- // We now know, how many instructions neighboring the system call we
- // can safely overwrite. On x86-32 we need six bytes, and on x86-64
- // We need five bytes to insert a JMPQ and a 32bit address. We then
- // jump to a code fragment that safely forwards to our system call
- // wrapper.
- // On x86-64, this is complicated by the fact that the API allows up
- // to 128 bytes of red-zones below the current stack pointer. So, we
- // cannot write to the stack until we have adjusted the stack
- // pointer.
- // On both x86-32 and x86-64 we take care to leave the stack unchanged
- // while we are executing the preamble and postamble. This allows us
- // to treat instructions that reference %esp/%rsp as safe for
- // relocation.
- // In particular, this means that on x86-32 we cannot use CALL, but
- // have to use a PUSH/RET combination to change the instruction pointer.
- // On x86-64, we can instead use a 32bit JMPQ.
- //
- // .. .. .. .. ; any leading instructions copied from original code
- // 48 81 EC 80 00 00 00 SUB $0x80, %rsp
- // 50 PUSH %rax
- // 48 8D 05 .. .. .. .. LEA ...(%rip), %rax
- // 50 PUSH %rax
- // 48 B8 .. .. .. .. MOV $syscallWrapper, %rax
- // .. .. .. ..
- // 50 PUSH %rax
- // 48 8D 05 06 00 00 00 LEA 6(%rip), %rax
- // 48 87 44 24 10 XCHG %rax, 16(%rsp)
- // C3 RETQ
- // 48 81 C4 80 00 00 00 ADD $0x80, %rsp
- // .. .. .. .. ; any trailing instructions copied from original code
- // E9 .. .. .. .. JMPQ ...
- //
- // Total: 52 bytes + any bytes that were copied
- //
- // On x86-32, the stack is available and we can do:
- //
- // TODO(markus): Try to maintain frame pointers on x86-32
- //
- // .. .. .. .. ; any leading instructions copied from original code
- // 68 .. .. .. .. PUSH return_addr
- // 68 .. .. .. .. PUSH $syscallWrapper
- // C3 RET
- // .. .. .. .. ; any trailing instructions copied from original code
- // 68 .. .. .. .. PUSH return_addr
- // C3 RET
- //
- // Total: 17 bytes + any bytes that were copied
- //
- // For indirect jumps from the VDSO to the VSyscall page, we instead
- // replace the following code (this is only necessary on x86-64). This
- // time, we don't have to worry about red zones:
- //
- // .. .. .. .. ; any leading instructions copied from original code
- // E8 00 00 00 00 CALL .
- // 48 83 04 24 .. ADDQ $.., (%rsp)
- // FF .. .. .. .. .. PUSH .. ; from original CALL instruction
- // 48 81 3C 24 00 00 00 FF CMPQ $0xFFFFFFFFFF000000, 0(%rsp)
- // 72 10 JB . + 16
- // 81 2C 24 .. .. .. .. SUBL ..., 0(%rsp)
- // C7 44 24 04 00 00 00 00 MOVL $0, 4(%rsp)
- // C3 RETQ
- // 48 87 04 24 XCHG %rax,(%rsp)
- // 48 89 44 24 08 MOV %rax,0x8(%rsp)
- // 58 POP %rax
- // C3 RETQ
- // .. .. .. .. ; any trailing instructions copied from original code
- // E9 .. .. .. .. JMPQ ...
- //
- // Total: 52 bytes + any bytes that were copied
-
- if (length < (__WORDSIZE == 32 ? 6 : 5)) {
- // There are a very small number of instruction sequences that we
- // cannot easily intercept, and that have been observed in real world
- // examples. Handle them here:
- #if defined(__i386__)
- int diff;
- if (!memcmp(code[codeIdx].addr, "\xCD\x80\xEB", 3) &&
- (diff = *reinterpret_cast<signed char *>(
- code[codeIdx].addr + 3)) < 0 && diff >= -6) {
- // We have seen...
- // for (;;) {
- // _exit(0);
- // }
- // ..get compiled to:
- // B8 01 00 00 00 MOV $__NR_exit, %eax
- // 66 90 XCHG %ax, %ax
- // 31 DB 0:XOR %ebx, %ebx
- // CD 80 INT $0x80
- // EB FA JMP 0b
- // The JMP is really superfluous as the system call never returns.
- // And there are in fact no returning system calls that need to be
- // unconditionally repeated in an infinite loop.
- // If we replace the JMP with NOPs, the system call can successfully
- // be intercepted.
- *reinterpret_cast<unsigned short *>(code[codeIdx].addr + 2) = 0x9090;
- goto findEndIdx;
- }
- #elif defined(__x86_64__)
- std::set<char *>::const_iterator iter;
- #endif
- // If we cannot figure out any other way to intercept this system call,
- // we replace it with a call to INT0. This causes a SEGV which we then
- // handle in the signal handler. That's a lot slower than rewriting the
- // instruction with a jump, but it should only happen very rarely.
- if (is_syscall) {
- memcpy(code[codeIdx].addr, "\xCD", 2);
- if (code[codeIdx].len > 2) {
- memset(code[codeIdx].addr + 2, 0x90, code[codeIdx].len - 2);
- }
- goto replaced;
- }
- #if defined(__x86_64__)
- // On x86-64, we occasionally see code like this in the VDSO:
- // 48 8B 05 CF FE FF FF MOV -0x131(%rip),%rax
- // FF 50 20 CALLQ *0x20(%rax)
- // By default, we would not replace the MOV instruction, as it is
- // IP relative. But if the following instruction is also IP relative,
- // we are left with only three bytes which is not enough to insert a
- // jump.
- // We recognize this particular situation, and as long as the CALLQ
- // is not a branch target, we decide to still relocate the entire
- // sequence. We just have to make sure that we then patch up the
- // IP relative addressing.
- else if (is_indirect_call && startIdx == codeIdx &&
- code[startIdx = (startIdx + (sizeof(code) /
- sizeof(struct Code)) - 1) %
- (sizeof(code) / sizeof(struct Code))].addr &&
- ptr - code[startIdx].addr >= 5 &&
- code[startIdx].is_ip_relative &&
- isSafeInsn(code[startIdx].insn) &&
- ((iter = std::upper_bound(branch_targets.begin(),
- branch_targets.end(),
- code[startIdx].addr)) ==
- branch_targets.end() || *iter >= ptr)) {
- // We changed startIdx to include the IP relative instruction.
- // When copying this preamble, we make sure to patch up the
- // offset.
- }
- #endif
- else {
- Sandbox::die("Cannot intercept system call");
- }
- }
- int needed = (__WORDSIZE == 32 ? 6 : 5) - code[codeIdx].len;
- int first = codeIdx;
- while (needed > 0 && first != startIdx) {
- first = (first + (sizeof(code) / sizeof(struct Code)) - 1) %
- (sizeof(code) / sizeof(struct Code));
- needed -= code[first].len;
- }
- int second = codeIdx;
- while (needed > 0) {
- second = (second + 1) % (sizeof(code) / sizeof(struct Code));
- needed -= code[second].len;
- }
- int preamble = code[codeIdx].addr - code[first].addr;
- int postamble = code[second].addr + code[second].len -
- code[codeIdx].addr - code[codeIdx].len;
-
- // The following is all the code that construct the various bits of
- // assembly code.
- #if defined(__x86_64__)
- if (is_indirect_call) {
- needed = 52 + preamble + code[codeIdx].len + postamble;
- } else {
- needed = 52 + preamble + postamble;
- }
- #elif defined(__i386__)
- needed = 17 + preamble + postamble;
- #else
- #error Unsupported target platform
- #endif
-
- // Allocate scratch space and copy the preamble of code that was moved
- // from the function that we are patching.
- char* dest = getScratchSpace(maps, code[first].addr, needed,
- extraSpace, extraLength);
- memcpy(dest, code[first].addr, preamble);
-
- // For jumps from the VDSO to the VSyscalls we sometimes allow exactly
- // one IP relative instruction in the preamble.
- if (code[first].is_ip_relative) {
- *reinterpret_cast<int *>(dest + (code[codeIdx].addr -
- code[first].addr) - 4)
- -= dest - code[first].addr;
- }
-
- // For indirect calls, we need to copy the actual CALL instruction and
- // turn it into a PUSH instruction.
- #if defined(__x86_64__)
- if (is_indirect_call) {
- memcpy(dest + preamble, "\xE8\x00\x00\x00\x00\x48\x83\x04\x24", 9);
- dest[preamble + 9] = code[codeIdx].len + 42;
- memcpy(dest + preamble + 10, code[codeIdx].addr, code[codeIdx].len);
-
- // Convert CALL -> PUSH
- dest[preamble + 10 + (mod_rm - code[codeIdx].addr)] |= 0x20;
- preamble += 10 + code[codeIdx].len;
- }
- #endif
-
- // Copy the static body of the assembly code.
- memcpy(dest + preamble,
- #if defined(__x86_64__)
- is_indirect_call ?
- "\x48\x81\x3C\x24\x00\x00\x00\xFF\x72\x10\x81\x2C\x24\x00\x00\x00"
- "\x00\xC7\x44\x24\x04\x00\x00\x00\x00\xC3\x48\x87\x04\x24\x48\x89"
- "\x44\x24\x08\x58\xC3" :
- "\x48\x81\xEC\x80\x00\x00\x00\x50\x48\x8D\x05\x00\x00\x00\x00\x50"
- "\x48\xB8\x00\x00\x00\x00\x00\x00\x00\x00\x50\x48\x8D\x05\x06\x00"
- "\x00\x00\x48\x87\x44\x24\x10\xC3\x48\x81\xC4\x80\x00\x00",
- is_indirect_call ? 37 : 47
- #elif defined(__i386__)
- "\x68\x00\x00\x00\x00\x68\x00\x00\x00\x00\xC3", 11
- #else
- #error Unsupported target platform
- #endif
- );
-
- // Copy the postamble that was moved from the function that we are
- // patching.
- memcpy(dest + preamble +
- #if defined(__x86_64__)
- (is_indirect_call ? 37 : 47),
- #elif defined(__i386__)
- 11,
- #else
- #error Unsupported target platform
- #endif
- code[codeIdx].addr + code[codeIdx].len,
- postamble);
-
- // Patch up the various computed values
- #if defined(__x86_64__)
- int post = preamble + (is_indirect_call ? 37 : 47) + postamble;
- dest[post] = '\xE9';
- *reinterpret_cast<int *>(dest + post + 1) =
- (code[second].addr + code[second].len) - (dest + post + 5);
- if (is_indirect_call) {
- *reinterpret_cast<int *>(dest + preamble + 13) = vsys_offset_;
- } else {
- *reinterpret_cast<int *>(dest + preamble + 11) =
- (code[second].addr + code[second].len) - (dest + preamble + 15);
- *reinterpret_cast<void **>(dest + preamble + 18) =
- reinterpret_cast<void *>(&syscallWrapper);
- }
- #elif defined(__i386__)
- *(dest + preamble + 11 + postamble) = '\x68'; // PUSH
- *reinterpret_cast<char **>(dest + preamble + 12 + postamble) =
- code[second].addr + code[second].len;
- *(dest + preamble + 16 + postamble) = '\xC3'; // RET
- *reinterpret_cast<char **>(dest + preamble + 1) =
- dest + preamble + 11;
- *reinterpret_cast<void (**)()>(dest + preamble + 6) = syscallWrapper;
- #else
- #error Unsupported target platform
- #endif
-
- // Pad unused space in the original function with NOPs
- memset(code[first].addr, 0x90 /* NOP */,
- code[second].addr + code[second].len - code[first].addr);
-
- // Replace the system call with an unconditional jump to our new code.
- #if defined(__x86_64__)
- *code[first].addr = '\xE9'; // JMPQ
- *reinterpret_cast<int *>(code[first].addr + 1) =
- dest - (code[first].addr + 5);
- #elif defined(__i386__)
- code[first].addr[0] = '\x68'; // PUSH
- *reinterpret_cast<char **>(code[first].addr + 1) = dest;
- code[first].addr[5] = '\xC3'; // RET
- #else
- #error Unsupported target platform
- #endif
- }
- replaced:
- codeIdx = (codeIdx + 1) % (sizeof(code) / sizeof(struct Code));
- }
-}
-
-void Library::patchVDSO(char** extraSpace, int* extraLength){
- #if defined(__i386__)
- Sandbox::SysCalls sys;
- if (!__kernel_vsyscall ||
- sys.mprotect(reinterpret_cast<void *>(
- reinterpret_cast<long>(__kernel_vsyscall) & ~0xFFF),
- 4096, PROT_READ|PROT_WRITE|PROT_EXEC)) {
- return;
- }
-
- // x86-32 has a small number of well-defined functions in the VDSO library.
- // These functions do not easily lend themselves to be rewritten by the
- // automatic code. Instead, we explicitly find new definitions for them.
- //
- // We don't bother with optimizing the syscall instruction instead always
- // use INT $0x80, no matter whether the hardware supports more modern
- // calling conventions.
- //
- // TODO(markus): Investigate whether it is worthwhile to optimize this
- // code path and use the platform-specific entry code.
- if (__kernel_vsyscall) {
- // Replace the kernel entry point with:
- //
- // E9 .. .. .. .. JMP syscallWrapper
- *__kernel_vsyscall = '\xE9';
- *reinterpret_cast<long *>(__kernel_vsyscall + 1) =
- reinterpret_cast<char *>(&syscallWrapper) -
- reinterpret_cast<char *>(__kernel_vsyscall + 5);
- }
- if (__kernel_sigreturn) {
- // Replace the sigreturn() system call with a jump to code that does:
- //
- // 58 POP %eax
- // B8 77 00 00 00 MOV $0x77, %eax
- // E8 .. .. .. .. CALL syscallWrapper
- char* dest = getScratchSpace(maps_, __kernel_sigreturn, 11, extraSpace,
- extraLength);
- memcpy(dest, "\x58\xB8\x77\x00\x00\x00\xE8", 7);
- *reinterpret_cast<long *>(dest + 7) =
- reinterpret_cast<char *>(&syscallWrapper) - dest - 11;;
- *__kernel_sigreturn = '\xE9';
- *reinterpret_cast<long *>(__kernel_sigreturn + 1) =
- dest - reinterpret_cast<char *>(__kernel_sigreturn) - 5;
- }
- if (__kernel_rt_sigreturn) {
- // Replace the rt_sigreturn() system call with a jump to code that does:
- //
- // B8 AD 00 00 00 MOV $0xAD, %eax
- // E8 .. .. .. .. CALL syscallWrapper
- char* dest = getScratchSpace(maps_, __kernel_rt_sigreturn, 10, extraSpace,
- extraLength);
- memcpy(dest, "\xB8\xAD\x00\x00\x00\xE8", 6);
- *reinterpret_cast<long *>(dest + 6) =
- reinterpret_cast<char *>(&syscallWrapper) - dest - 10;
- *__kernel_rt_sigreturn = '\xE9';
- *reinterpret_cast<long *>(__kernel_rt_sigreturn + 1) =
- dest - reinterpret_cast<char *>(__kernel_rt_sigreturn) - 5;
- }
- #endif
-}
-
-int Library::patchVSystemCalls() {
- #if defined(__x86_64__)
- // VSyscalls live in a shared 4kB page at the top of the address space. This
- // page cannot be unmapped nor remapped. We have to create a copy within
- // 2GB of the page, and rewrite all IP-relative accesses to shared variables.
- // As the top of the address space is not accessible by mmap(), this means
- // that we need to wrap around addresses to the bottom 2GB of the address
- // space.
- // Only x86-64 has VSyscalls.
- if (maps_->vsyscall()) {
- char* copy = maps_->allocNearAddr(maps_->vsyscall(), 0x1000,
- PROT_READ|PROT_WRITE|PROT_EXEC);
- char* extraSpace = copy;
- int extraLength = 0x1000;
- memcpy(copy, maps_->vsyscall(), 0x1000);
- long adjust = (long)maps_->vsyscall() - (long)copy;
- for (int vsys = 0; vsys < 0x1000; vsys += 0x400) {
- char* start = copy + vsys;
- char* end = start + 0x400;
-
- // There can only be up to four VSyscalls starting at an offset of
- // n*0x1000, each. VSyscalls are invoked by functions in the VDSO
- // and provide fast implementations of a time source. We don't exactly
- // know where the code and where the data is in the VSyscalls page.
- // So, we disassemble the code for each function and find all branch
- // targets within the function in order to find the last address of
- // function.
- for (char *last = start, *vars = end, *ptr = start; ptr < end; ) {
- new_function:
- char* mod_rm;
- unsigned short insn = next_inst((const char **)&ptr, true, 0, 0,
- &mod_rm, 0, 0);
- if (mod_rm && (*mod_rm & 0xC7) == 0x5) {
- // Instruction has IP relative addressing mode. Adjust to reference
- // the variables in the original VSyscall segment.
- long offset = *reinterpret_cast<int *>(mod_rm + 1);
- char* var = ptr + offset;
- if (var >= ptr && var < vars) {
- // Variables are stored somewhere past all the functions. Remember
- // the first variable in the VSyscall slot, so that we stop
- // scanning for instructions once we reach that address.
- vars = var;
- }
- offset += adjust;
- if ((offset >> 32) && (offset >> 32) != -1) {
- Sandbox::die("Cannot patch [vsystemcall]");
- }
- *reinterpret_cast<int *>(mod_rm + 1) = offset;
- }
-
- // Check for jump targets to higher addresses (but within our own
- // VSyscall slot). They extend the possible end-address of this
- // function.
- char *target = 0;
- if ((insn >= 0x70 && insn <= 0x7F) /* Jcc */ ||
- insn == 0xEB /* JMP */) {
- target = ptr + (reinterpret_cast<signed char *>(ptr))[-1];
- } else if (insn == 0xE8 /* CALL */ || insn == 0xE9 /* JMP */ ||
- (insn >= 0x0F80 && insn <= 0x0F8F) /* Jcc */) {
- target = ptr + (reinterpret_cast<int *>(ptr))[-1];
- }
-
- // The function end is found, once the loop reaches the last valid
- // address in the VSyscall slot, or once it finds a RET instruction
- // that is not followed by any jump targets. Unconditional jumps that
- // point backwards are treated the same as a RET instruction.
- if (insn == 0xC3 /* RET */ ||
- (target < ptr &&
- (insn == 0xEB /* JMP */ || insn == 0xE9 /* JMP */))) {
- if (last >= ptr) {
- continue;
- } else {
- // The function can optionally be followed by more functions in
- // the same VSyscall slot. Allow for alignment to a 16 byte
- // boundary. If we then find more non-zero bytes, and if this is
- // not the known start of the variables, assume a new function
- // started.
- for (; ptr < vars; ++ptr) {
- if ((long)ptr & 0xF) {
- if (*ptr && *ptr != '\x90' /* NOP */) {
- goto new_function;
- }
- *ptr = '\x90'; // NOP
- } else {
- if (*ptr && *ptr != '\x90' /* NOP */) {
- goto new_function;
- }
- break;
- }
- }
-
- // Translate all SYSCALLs to jumps into our system call handler.
- patchSystemCallsInFunction(NULL, start, ptr,
- &extraSpace, &extraLength);
- break;
- }
- }
-
- // Adjust assumed end address for this function, if a valid jump
- // target has been found that originates from the current instruction.
- if (target > last && target < start + 0x100) {
- last = target;
- }
- }
- }
-
- // We are done. Write-protect our code and make it executable.
- Sandbox::SysCalls sys;
- sys.mprotect(copy, 0x1000, PROT_READ|PROT_EXEC);
- return maps_->vsyscall() - copy;
- }
- #endif
- return 0;
-}
-
-void Library::patchSystemCalls() {
- if (!valid_) {
- return;
- }
- int extraLength = 0;
- char* extraSpace = NULL;
- if (isVDSO_) {
- // patchVDSO() calls patchSystemCallsInFunction() which needs vsys_offset_
- // iff processing the VDSO library. So, make sure we call
- // patchVSystemCalls() first.
- vsys_offset_ = patchVSystemCalls();
- #if defined(__i386__)
- patchVDSO(&extraSpace, &extraLength);
- return;
- #endif
- }
- SectionTable::const_iterator iter;
- if ((iter = section_table_.find(".text")) == section_table_.end()) {
- return;
- }
- const Elf_Shdr& shdr = iter->second.second;
- char* start = reinterpret_cast<char *>(shdr.sh_addr + asr_offset_);
- char* stop = start + shdr.sh_size;
- char* func = start;
- int nopcount = 0;
- bool has_syscall = false;
- for (char *ptr = start; ptr < stop; ptr++) {
- #if defined(__x86_64__)
- if ((*ptr == '\x0F' && ptr[1] == '\x05' /* SYSCALL */) ||
- (isVDSO_ && *ptr == '\xFF')) {
- #elif defined(__i386__)
- if ((*ptr == '\xCD' && ptr[1] == '\x80' /* INT $0x80 */) ||
- (*ptr == '\x65' && ptr[1] == '\xFF' &&
- ptr[2] == '\x15' /* CALL %gs:.. */)) {
- #else
- #error Unsupported target platform
- #endif
- ptr++;
- has_syscall = true;
- nopcount = 0;
- } else if (*ptr == '\x90' /* NOP */) {
- nopcount++;
- } else if (!(reinterpret_cast<long>(ptr) & 0xF)) {
- if (nopcount > 2) {
- // This is very likely the beginning of a new function. Functions
- // are aligned on 16 byte boundaries and the preceding function is
- // padded out with NOPs.
- //
- // For performance reasons, we quickly scan the entire text segment
- // for potential SYSCALLs, and then patch the code in increments of
- // individual functions.
- if (has_syscall) {
- has_syscall = false;
- // Our quick scan of the function found a potential system call.
- // Do a more thorough scan, now.
- patchSystemCallsInFunction(maps_, func, ptr, &extraSpace,
- &extraLength);
- }
- func = ptr;
- }
- nopcount = 0;
- } else {
- nopcount = 0;
- }
- }
- if (has_syscall) {
- // Patch any remaining system calls that were in the last function before
- // the loop terminated.
- patchSystemCallsInFunction(maps_, func, stop, &extraSpace, &extraLength);
- }
-
- // Mark our scratch space as write-protected and executable.
- if (extraSpace) {
- Sandbox::SysCalls sys;
- sys.mprotect(extraSpace, 4096, PROT_READ|PROT_EXEC);
- }
-}
-
-bool Library::parseElf() {
- valid_ = true;
-
- // Verify ELF header
- Elf_Shdr str_shdr;
- if (!getOriginal(0, &ehdr_) ||
- ehdr_.e_ehsize < sizeof(Elf_Ehdr) ||
- ehdr_.e_phentsize < sizeof(Elf_Phdr) ||
- ehdr_.e_shentsize < sizeof(Elf_Shdr) ||
- !getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize,
- &str_shdr)) {
- // Not all memory mappings are necessarily ELF files. Skip memory
- // mappings that we cannot identify.
- error:
- valid_ = false;
- return false;
- }
-
- // Parse section table and find all sections in this ELF file
- for (int i = 0; i < ehdr_.e_shnum; i++) {
- Elf_Shdr shdr;
- if (!getOriginal(ehdr_.e_shoff + i*ehdr_.e_shentsize, &shdr)) {
- continue;
- }
- section_table_.insert(
- std::make_pair(getOriginal(str_shdr.sh_offset + shdr.sh_name),
- std::make_pair(i, shdr)));
- }
-
- // Compute the offset of entries in the .text segment
- const Elf_Shdr* text = getSection(".text");
- if (text == NULL) {
- // On x86-32, the VDSO is unusual in as much as it does not have a single
- // ".text" section. Instead, it has one section per function. Each
- // section name starts with ".text". We just need to pick an arbitrary
- // one in order to find the asr_offset_ -- which would typically be zero
- // for the VDSO.
- for (SectionTable::const_iterator iter = section_table_.begin();
- iter != section_table_.end(); ++iter) {
- if (!strncmp(iter->first.c_str(), ".text", 5)) {
- text = &iter->second.second;
- break;
- }
- }
- }
-
- // Now that we know where the .text segment is located, we can compute the
- // asr_offset_.
- if (text) {
- RangeMap::const_iterator iter =
- memory_ranges_.lower_bound(text->sh_offset);
- if (iter != memory_ranges_.end()) {
- asr_offset_ = reinterpret_cast<char *>(iter->second.start) -
- (text->sh_addr - (text->sh_offset - iter->first));
- } else {
- goto error;
- }
- } else {
- goto error;
- }
-
- return !isVDSO_ || parseSymbols();
-}
-
-bool Library::parseSymbols() {
- if (!valid_) {
- return false;
- }
-
- Elf_Shdr str_shdr;
- getOriginal(ehdr_.e_shoff + ehdr_.e_shstrndx * ehdr_.e_shentsize, &str_shdr);
-
- // Find PLT and symbol tables
- const Elf_Shdr* plt = getSection(ELF_REL_PLT);
- const Elf_Shdr* symtab = getSection(".dynsym");
- Elf_Shdr strtab = { 0 };
- if (symtab) {
- if (symtab->sh_link >= ehdr_.e_shnum ||
- !getOriginal(ehdr_.e_shoff + symtab->sh_link * ehdr_.e_shentsize,
- &strtab)) {
- Debug::message("Cannot find valid symbol table\n");
- valid_ = false;
- return false;
- }
- }
-
- if (plt && symtab) {
- // Parse PLT table and add its entries
- for (int i = plt->sh_size/sizeof(Elf_Rel); --i >= 0; ) {
- Elf_Rel rel;
- if (!getOriginal(plt->sh_offset + i * sizeof(Elf_Rel), &rel) ||
- ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym) >= symtab->sh_size) {
- Debug::message("Encountered invalid plt entry\n");
- valid_ = false;
- return false;
- }
-
- if (ELF_R_TYPE(rel.r_info) != ELF_JUMP_SLOT) {
- continue;
- }
- Elf_Sym sym;
- if (!getOriginal(symtab->sh_offset +
- ELF_R_SYM(rel.r_info)*sizeof(Elf_Sym), &sym) ||
- sym.st_shndx >= ehdr_.e_shnum) {
- Debug::message("Encountered invalid symbol for plt entry\n");
- valid_ = false;
- return false;
- }
- string name = getOriginal(strtab.sh_offset + sym.st_name);
- if (name.empty()) {
- continue;
- }
- plt_entries_.insert(std::make_pair(name, rel.r_offset));
- }
- }
-
- if (symtab) {
- // Parse symbol table and add its entries
- for (Elf_Addr addr = 0; addr < symtab->sh_size; addr += sizeof(Elf_Sym)) {
- Elf_Sym sym;
- if (!getOriginal(symtab->sh_offset + addr, &sym) ||
- (sym.st_shndx >= ehdr_.e_shnum &&
- sym.st_shndx < SHN_LORESERVE)) {
- Debug::message("Encountered invalid symbol\n");
- valid_ = false;
- return false;
- }
- string name = getOriginal(strtab.sh_offset + sym.st_name);
- if (name.empty()) {
- continue;
- }
- symbols_.insert(std::make_pair(name, sym));
- }
- }
-
- SymbolTable::const_iterator iter = symbols_.find("__kernel_vsyscall");
- if (iter != symbols_.end() && iter->second.st_value) {
- __kernel_vsyscall = asr_offset_ + iter->second.st_value;
- }
- iter = symbols_.find("__kernel_sigreturn");
- if (iter != symbols_.end() && iter->second.st_value) {
- __kernel_sigreturn = asr_offset_ + iter->second.st_value;
- }
- iter = symbols_.find("__kernel_rt_sigreturn");
- if (iter != symbols_.end() && iter->second.st_value) {
- __kernel_rt_sigreturn = asr_offset_ + iter->second.st_value;
- }
-
- return true;
-}
-
-} // namespace