blob: 99f631b18c434587a5f24f561168ebe97b41d702 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/public/common/sandbox_init.h"
#if defined(OS_LINUX) && defined(__x86_64__)
#include <asm/unistd.h>
#include <errno.h>
#include <linux/audit.h>
#include <linux/filter.h>
#include <signal.h>
#include <string.h>
#include <sys/prctl.h>
#include <ucontext.h>
#include <unistd.h>
#include <vector>
#include "base/command_line.h"
#include "base/file_util.h"
#include "base/logging.h"
#include "base/time.h"
#include "content/public/common/content_switches.h"
#ifndef PR_SET_NO_NEW_PRIVS
#define PR_SET_NO_NEW_PRIVS 38
#endif
#ifndef SYS_SECCOMP
#define SYS_SECCOMP 1
#endif
#ifndef __NR_eventfd2
#define __NR_eventfd2 290
#endif
// Constants from very new header files that we can't yet include.
#ifndef SECCOMP_MODE_FILTER
#define SECCOMP_MODE_FILTER 2
#define SECCOMP_RET_KILL 0x00000000U
#define SECCOMP_RET_TRAP 0x00030000U
#define SECCOMP_RET_ERRNO 0x00050000U
#define SECCOMP_RET_ALLOW 0x7fff0000U
#endif
namespace {
static void CheckSingleThreaded() {
// Possibly racy, but it's ok because this is more of a debug check to catch
// new threaded situations arising during development.
// It also has to be a DCHECK() because production builds will be running
// the suid sandbox, which will prevent /proc access in some contexts.
DCHECK_EQ(file_util::CountFilesCreatedAfter(FilePath("/proc/self/task"),
base::Time::UnixEpoch()),
1);
}
static void SIGSYS_Handler(int signal, siginfo_t* info, void* void_context) {
if (signal != SIGSYS)
return;
if (info->si_code != SYS_SECCOMP)
return;
if (!void_context)
return;
ucontext_t* context = reinterpret_cast<ucontext_t*>(void_context);
uintptr_t syscall = context->uc_mcontext.gregs[REG_RAX];
if (syscall >= 1024)
syscall = 0;
// Encode 8-bits of the 1st two arguments too, so we can discern which socket
// type, which fcntl, ... etc., without being likely to hit a mapped
// address.
// Do not encode more bits here without thinking about increasing the
// likelihood of collision with mapped pages.
syscall |= ((context->uc_mcontext.gregs[REG_RDI] & 0xffUL) << 12);
syscall |= ((context->uc_mcontext.gregs[REG_RSI] & 0xffUL) << 20);
// Purposefully dereference the syscall as an address so it'll show up very
// clearly and easily in crash dumps.
volatile char* addr = reinterpret_cast<volatile char*>(syscall);
*addr = '\0';
// In case we hit a mapped address, hit the null page with just the syscall,
// for paranoia.
syscall &= 0xfffUL;
addr = reinterpret_cast<volatile char*>(syscall);
*addr = '\0';
_exit(1);
}
static void InstallSIGSYSHandler() {
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = SIGSYS_Handler;
int ret = sigaction(SIGSYS, &sa, NULL);
PLOG_IF(FATAL, ret != 0) << "Failed to install SIGSYS handler.";
}
static void EmitLoad(int offset, std::vector<struct sock_filter>* program) {
struct sock_filter filter;
filter.code = BPF_LD+BPF_W+BPF_ABS;
filter.jt = 0;
filter.jf = 0;
filter.k = offset;
program->push_back(filter);
}
static void EmitLoadArg(int arg, std::vector<struct sock_filter>* program) {
// Each argument is 8 bytes, independent of architecture, and start at
// an offset of 16 bytes, indepdendent of architecture.
EmitLoad(((arg - 1) * 8) + 16, program);
}
static void EmitJEQJT1(int value, std::vector<struct sock_filter>* program) {
struct sock_filter filter;
filter.code = BPF_JMP+BPF_JEQ+BPF_K;
filter.jt = 1;
filter.jf = 0;
filter.k = value;
program->push_back(filter);
}
static void EmitJEQJF(int value,
int jf,
std::vector<struct sock_filter>* program) {
struct sock_filter filter;
filter.code = BPF_JMP+BPF_JEQ+BPF_K;
filter.jt = 0;
filter.jf = jf;
filter.k = value;
program->push_back(filter);
}
static void EmitRet(int value, std::vector<struct sock_filter>* program) {
struct sock_filter filter;
filter.code = BPF_RET+BPF_K;
filter.jt = 0;
filter.jf = 0;
filter.k = value;
program->push_back(filter);
}
static void EmitPreamble(std::vector<struct sock_filter>* program) {
// First, check correct syscall arch.
// "4" is magic offset for the arch number.
EmitLoad(4, program);
EmitJEQJT1(AUDIT_ARCH_X86_64, program);
EmitRet(SECCOMP_RET_KILL, program);
// Load the syscall number.
// "0" is magic offset for the syscall number.
EmitLoad(0, program);
}
static void EmitAllowSyscall(int nr, std::vector<struct sock_filter>* program) {
EmitJEQJF(nr, 1, program);
EmitRet(SECCOMP_RET_ALLOW, program);
}
static void EmitAllowSyscallArgN(int nr,
int arg_nr,
int arg_val,
std::vector<struct sock_filter>* program) {
// Jump forward 4 on no-match so that we also skip the unneccessary reload of
// syscall_nr. (It is unneccessary because we have not trashed it yet.)
EmitJEQJF(nr, 4, program);
EmitLoadArg(arg_nr, program);
EmitJEQJF(arg_val, 1, program);
EmitRet(SECCOMP_RET_ALLOW, program);
// We trashed syscall_nr so put it back in the accumulator.
EmitLoad(0, program);
}
static void EmitFailSyscall(int nr, int err,
std::vector<struct sock_filter>* program) {
EmitJEQJF(nr, 1, program);
EmitRet(SECCOMP_RET_ERRNO | err, program);
}
static void EmitTrap(std::vector<struct sock_filter>* program) {
EmitRet(SECCOMP_RET_TRAP, program);
}
static void EmitAllowKillSelf(int signal,
std::vector<struct sock_filter>* program) {
EmitAllowSyscallArgN(__NR_kill, 2, signal, program);
}
static void EmitAllowGettime(std::vector<struct sock_filter>* program) {
EmitAllowSyscall(__NR_clock_gettime, program);
EmitAllowSyscall(__NR_gettimeofday, program);
}
static void ApplyGPUPolicy(std::vector<struct sock_filter>* program) {
// "Hot" syscalls go first.
EmitAllowSyscall(__NR_read, program);
EmitAllowSyscall(__NR_ioctl, program);
EmitAllowSyscall(__NR_poll, program);
EmitAllowSyscall(__NR_epoll_wait, program);
EmitAllowSyscall(__NR_recvfrom, program);
EmitAllowSyscall(__NR_write, program);
EmitAllowSyscall(__NR_writev, program);
EmitAllowSyscall(__NR_gettid, program);
EmitAllowSyscall(__NR_sched_yield, program); // Nvidia binary driver.
EmitAllowGettime(program);
// Less hot syscalls.
EmitAllowSyscall(__NR_futex, program);
EmitAllowSyscall(__NR_madvise, program);
EmitAllowSyscall(__NR_sendmsg, program);
EmitAllowSyscall(__NR_recvmsg, program);
EmitAllowSyscall(__NR_eventfd2, program);
EmitAllowSyscall(__NR_pipe, program);
EmitAllowSyscall(__NR_mmap, program);
EmitAllowSyscall(__NR_mprotect, program);
EmitAllowSyscall(__NR_clone, program);
EmitAllowSyscall(__NR_set_robust_list, program);
EmitAllowSyscall(__NR_getuid, program);
EmitAllowSyscall(__NR_geteuid, program);
EmitAllowSyscall(__NR_getgid, program);
EmitAllowSyscall(__NR_getegid, program);
EmitAllowSyscall(__NR_epoll_create, program);
EmitAllowSyscall(__NR_fcntl, program);
EmitAllowSyscall(__NR_socketpair, program);
EmitAllowSyscall(__NR_epoll_ctl, program);
EmitAllowSyscall(__NR_prctl, program);
EmitAllowSyscall(__NR_fstat, program);
EmitAllowSyscall(__NR_close, program);
EmitAllowSyscall(__NR_restart_syscall, program);
EmitAllowSyscall(__NR_rt_sigreturn, program);
EmitAllowSyscall(__NR_brk, program);
EmitAllowSyscall(__NR_rt_sigprocmask, program);
EmitAllowSyscall(__NR_munmap, program);
EmitAllowSyscall(__NR_dup, program);
EmitAllowSyscall(__NR_mlock, program);
EmitAllowSyscall(__NR_munlock, program);
EmitAllowSyscall(__NR_exit, program);
EmitAllowSyscall(__NR_exit_group, program);
EmitAllowSyscall(__NR_getpid, program); // Nvidia binary driver.
EmitAllowSyscall(__NR_getppid, program); // ATI binary driver.
EmitAllowSyscall(__NR_lseek, program); // Nvidia binary driver.
EmitAllowKillSelf(SIGTERM, program); // GPU watchdog.
// Generally, filename-based syscalls will fail with ENOENT to behave
// similarly to a possible future setuid sandbox.
EmitFailSyscall(__NR_open, ENOENT, program);
EmitFailSyscall(__NR_access, ENOENT, program);
EmitFailSyscall(__NR_mkdir, ENOENT, program); // Nvidia binary driver.
EmitFailSyscall(__NR_readlink, ENOENT, program); // ATI binary driver.
}
static void ApplyFlashPolicy(std::vector<struct sock_filter>* program) {
// "Hot" syscalls go first.
EmitAllowSyscall(__NR_futex, program);
EmitAllowSyscall(__NR_write, program);
EmitAllowSyscall(__NR_epoll_wait, program);
EmitAllowSyscall(__NR_read, program);
EmitAllowSyscall(__NR_times, program);
// Less hot syscalls.
EmitAllowGettime(program);
EmitAllowSyscall(__NR_clone, program);
EmitAllowSyscall(__NR_set_robust_list, program);
EmitAllowSyscall(__NR_getuid, program);
EmitAllowSyscall(__NR_geteuid, program);
EmitAllowSyscall(__NR_getgid, program);
EmitAllowSyscall(__NR_getegid, program);
EmitAllowSyscall(__NR_epoll_create, program);
EmitAllowSyscall(__NR_fcntl, program);
EmitAllowSyscall(__NR_socketpair, program);
EmitAllowSyscall(__NR_pipe, program);
EmitAllowSyscall(__NR_epoll_ctl, program);
EmitAllowSyscall(__NR_gettid, program);
EmitAllowSyscall(__NR_prctl, program);
EmitAllowSyscall(__NR_fstat, program);
EmitAllowSyscall(__NR_sendmsg, program);
EmitAllowSyscall(__NR_mmap, program);
EmitAllowSyscall(__NR_munmap, program);
EmitAllowSyscall(__NR_mprotect, program);
EmitAllowSyscall(__NR_madvise, program);
EmitAllowSyscall(__NR_rt_sigaction, program);
EmitAllowSyscall(__NR_rt_sigprocmask, program);
EmitAllowSyscall(__NR_wait4, program);
EmitAllowSyscall(__NR_exit_group, program);
EmitAllowSyscall(__NR_exit, program);
EmitAllowSyscall(__NR_rt_sigreturn, program);
EmitAllowSyscall(__NR_restart_syscall, program);
EmitAllowSyscall(__NR_close, program);
EmitAllowSyscall(__NR_recvmsg, program);
EmitAllowSyscall(__NR_lseek, program);
EmitAllowSyscall(__NR_brk, program);
EmitAllowSyscall(__NR_sched_yield, program);
// These are under investigation, and hopefully not here for the long term.
EmitAllowSyscall(__NR_shmctl, program);
EmitAllowSyscall(__NR_shmat, program);
EmitAllowSyscall(__NR_shmdt, program);
EmitFailSyscall(__NR_open, ENOENT, program);
EmitFailSyscall(__NR_execve, ENOENT, program);
EmitFailSyscall(__NR_access, ENOENT, program);
}
static bool CanUseSeccompFilters() {
int ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 0, 0, 0);
if (ret != 0 && errno == EFAULT)
return true;
return false;
}
static void InstallFilter(const std::vector<struct sock_filter>& program) {
int ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
PLOG_IF(FATAL, ret != 0) << "prctl(PR_SET_NO_NEW_PRIVS) failed";
struct sock_fprog fprog;
fprog.len = program.size();
fprog.filter = const_cast<struct sock_filter*>(&program[0]);
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &fprog, 0, 0);
PLOG_IF(FATAL, ret != 0) << "Failed to install filter.";
}
} // anonymous namespace
namespace content {
void InitializeSandbox() {
const CommandLine& command_line = *CommandLine::ForCurrentProcess();
if (command_line.HasSwitch(switches::kNoSandbox) ||
command_line.HasSwitch(switches::kDisableSeccompFilterSandbox))
return;
std::string process_type =
command_line.GetSwitchValueASCII(switches::kProcessType);
if (process_type == switches::kGpuProcess &&
command_line.HasSwitch(switches::kDisableGpuSandbox))
return;
if (!CanUseSeccompFilters())
return;
CheckSingleThreaded();
std::vector<struct sock_filter> program;
EmitPreamble(&program);
if (process_type == switches::kGpuProcess) {
ApplyGPUPolicy(&program);
} else if (process_type == switches::kPpapiPluginProcess) {
ApplyFlashPolicy(&program);
} else {
NOTREACHED();
}
EmitTrap(&program);
InstallSIGSYSHandler();
InstallFilter(program);
}
} // namespace content
#else
namespace content {
void InitializeSandbox() {
}
} // namespace content
#endif