seccomp 函数详解
1. 函数介绍
2. 函数原型
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <unistd.h>
int prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);
int seccomp(unsigned int operation, unsigned int flags, void *args);
3. 功能
- 限制进程可执行的系统调用集合
- 定义系统调用的执行策略(允许、错误、终止)
- 使用BPF程序实现复杂的过滤逻辑
- 构建安全的沙箱环境
4. 参数
prctl方式:
- int option: 控制选项(如PR_SET_SECCOMP)
- unsigned long arg2: seccomp模式(SECCOMP_MODE_STRICT/SECCOMP_MODE_FILTER)
- 其他参数: 根据选项而定
seccomp系统调用:
- unsigned int operation: 操作类型(SECCOMP_SET_MODE_STRICT/SECCOMP_SET_MODE_FILTER)
- unsigned int flags: 标志位(通常为0)
- *void args: 操作参数(BPF程序指针等)
5. 返回值
- 成功: 返回0
- 失败: 返回-1,并设置errno
6. 相似函数,或关联函数
- prctl: 进程控制接口
- personality: 设置进程执行特性
- chroot: 改变根目录
- capset: 设置进程权限
7. 示例代码
示例1:基础seccomp使用
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
/**
* 演示基础seccomp使用方法
*/
int demo_seccomp_basic() {
printf("=== 基础seccomp使用示例 ===\n");
// 显示当前seccomp状态
int current_mode = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
printf("当前seccomp模式: ");
switch (current_mode) {
case 0:
printf("SECCOMP_MODE_DISABLED (禁用)\n");
break;
case 1:
printf("SECCOMP_MODE_STRICT (严格模式)\n");
break;
case 2:
printf("SECCOMP_MODE_FILTER (过滤模式)\n");
break;
default:
printf("未知模式 (%d)\n", current_mode);
break;
}
// 测试普通系统调用(应该成功)
printf("测试普通系统调用...\n");
write(STDOUT_FILENO, " 普通write调用成功\n", 21);
// 启用严格模式seccomp
printf("启用seccomp严格模式...\n");
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0) == -1) {
printf("启用seccomp失败: %s\n", strerror(errno));
printf("注意:严格模式只允许read/write/exit/exit_group系统调用\n");
return -1;
}
printf("seccomp严格模式启用成功\n");
printf("当前seccomp模式: %d\n", prctl(PR_GET_SECCOMP, 0, 0, 0, 0));
// 测试允许的系统调用
printf("测试允许的系统调用...\n");
write(STDOUT_FILENO, " write调用仍然允许\n", 20);
// 测试不允许的系统调用(这会导致程序终止)
printf("测试不允许的系统调用(程序将终止)...\n");
printf(" 尝试调用getpid()...\n");
// 注意:下面的调用会导致程序被SIGKILL终止
// 为了演示目的,我们注释掉危险操作
/*
pid_t pid = getpid(); // 这会导致程序终止!
printf("getpid()返回: %d\n", pid);
*/
printf(" 注意:getpid()等系统调用在严格模式下会被禁止\n");
printf(" 实际执行会导致程序被SIGKILL终止\n");
return 0;
}
int main() {
return demo_seccomp_basic();
}
示例2:自定义BPF过滤器
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
/**
* 创建允许特定系统调用的BPF过滤器
*/
int demo_custom_bpf_filter() {
printf("=== 自定义BPF过滤器示例 ===\n");
// 定义BPF过滤器程序
// 允许的系统调用:read, write, exit, exit_group
struct sock_filter filter[] = {
// 加载系统调用号到累加器
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
// 允许 read 系统调用 (SYS_read = 0)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 允许 write 系统调用 (SYS_write = 1)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 允许 exit 系统调用 (SYS_exit = 60 on x86_64)
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1),
#elif defined(__i386__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1),
#endif
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 允许 exit_group 系统调用
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1),
#elif defined(__i386__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 252, 0, 1),
#endif
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 其他系统调用返回EPERM错误
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
};
struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter[0]),
.filter = filter,
};
// 显示过滤器信息
printf("创建BPF过滤器,允许系统调用:\n");
printf(" read(%d), write(%d), exit(%d), exit_group(%d)\n",
#ifdef __x86_64__
SYS_read, SYS_write, 60, 231
#elif defined(__i386__)
SYS_read, SYS_write, 1, 252
#endif
);
printf("其他系统调用将返回EPERM错误\n");
// 应用BPF过滤器
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
printf("应用BPF过滤器失败: %s\n", strerror(errno));
printf("可能的原因:\n");
printf(" 1. 内核不支持seccomp BPF\n");
printf(" 2. 缺少CAP_SYS_ADMIN权限\n");
printf(" 3. 已经设置了seccomp策略\n");
return -1;
}
printf("BPF过滤器应用成功\n");
// 测试允许的系统调用
printf("\n测试允许的系统调用:\n");
write(STDOUT_FILENO, " write调用成功\n", 16);
char buffer[10];
ssize_t bytes_read = read(STDIN_FILENO, buffer, sizeof(buffer));
if (bytes_read >= 0) {
printf(" read调用成功\n");
}
// 测试不允许的系统调用
printf("\n测试不允许的系统调用:\n");
long result = syscall(SYS_getpid);
if (result == -1) {
printf(" getpid调用被阻止: %s\n", strerror(errno));
} else {
printf(" getpid调用意外成功: %ld\n", result);
}
result = syscall(SYS_open, "/etc/passwd", 0);
if (result == -1) {
printf(" open调用被阻止: %s\n", strerror(errno));
} else {
printf(" open调用意外成功: %ld\n", result);
}
printf("\n安全的系统调用仍然可以正常工作\n");
return 0;
}
int main() {
return demo_custom_bpf_filter();
}
示例3:只读沙箱环境
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/stat.h>
/**
* 创建只读沙箱环境的BPF过滤器
*/
int demo_readonly_sandbox() {
printf("=== 只读沙箱环境示例 ===\n");
// 定义只读沙箱的BPF过滤器
// 允许读操作和基本系统调用,禁止写操作
struct sock_filter filter[] = {
// 加载系统调用号
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
// 允许 read 系统调用
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 允许 write 系统调用(仅允许写到stdout/stderr)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 5),
// 检查文件描述符是否为stdout(1)或stderr(2)
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1), // stdout
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 1), // stderr
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
// 允许 exit 和 exit_group
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1), // exit
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1), // exit_group
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#elif defined(__i386__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1), // exit
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 252, 0, 1), // exit_group
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#endif
// 允许 read-only 文件操作
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 3),
// 检查打开标志是否包含O_RDONLY
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[1])),
BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, O_RDONLY, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
// 允许 close 系统调用
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_close, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 禁止其他所有系统调用
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
};
struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter[0]),
.filter = filter,
};
printf("创建只读沙箱环境\n");
printf("允许的操作:\n");
printf(" - 读取文件(只读模式)\n");
printf(" - 写入标准输出和标准错误\n");
printf(" - 基本的进程控制\n");
printf("禁止的操作:\n");
printf(" - 写入文件\n");
printf(" - 网络操作\n");
printf(" - 进程创建\n");
printf(" - 其他危险操作\n");
// 应用过滤器
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
printf("创建沙箱失败: %s\n", strerror(errno));
return -1;
}
printf("只读沙箱创建成功\n");
// 测试沙箱功能
printf("\n=== 沙箱功能测试 ===\n");
// 测试允许的读操作
printf("1. 测试允许的读操作:\n");
int fd = open("/etc/passwd", O_RDONLY);
if (fd != -1) {
char buffer[100];
ssize_t bytes = read(fd, buffer, sizeof(buffer));
if (bytes > 0) {
printf(" 读取/etc/passwd成功 (%zd 字节)\n", bytes);
}
close(fd);
} else {
printf(" 打开/etc/passwd失败: %s\n", strerror(errno));
}
// 测试允许的写操作(stdout/stderr)
printf("\n2. 测试允许的写操作:\n");
write(STDOUT_FILENO, " 写入stdout成功\n", 17);
write(STDERR_FILENO, " 写入stderr成功\n", 17);
// 测试禁止的写操作
printf("\n3. 测试禁止的写操作:\n");
fd = open("/tmp/test_seccomp", O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (fd == -1) {
printf(" 创建文件被阻止: %s\n", strerror(errno));
} else {
printf(" 创建文件意外成功\n");
close(fd);
unlink("/tmp/test_seccomp");
}
// 测试禁止的系统调用
printf("\n4. 测试禁止的系统调用:\n");
long result = syscall(SYS_fork);
if (result == -1) {
printf(" fork被阻止: %s\n", strerror(errno));
}
result = syscall(SYS_socket, AF_INET, SOCK_STREAM, 0);
if (result == -1) {
printf(" socket被阻止: %s\n", strerror(errno));
}
printf("\n沙箱环境测试完成\n");
return 0;
}
int main() {
return demo_readonly_sandbox();
}
示例4:进程监控和日志
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <signal.h>
#include <sys/wait.h>
/**
* 信号处理函数
*/
void signal_handler(int sig) {
printf("捕获信号 %d\n", sig);
if (sig == SIGSYS) {
printf("检测到被禁止的系统调用\n");
}
}
/**
* 演示seccomp的监控和日志功能
*/
int demo_seccomp_monitoring() {
printf("=== seccomp监控和日志示例 ===\n");
// 注册信号处理程序来捕获SIGSYS
signal(SIGSYS, signal_handler);
// 创建带日志的BPF过滤器
struct sock_filter filter[] = {
// 加载系统调用号
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
// 允许基本的读写操作
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
// 允许exit相关调用
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1), // exit
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1), // exit_group
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#endif
// 对于其他系统调用,返回追踪标志(用于日志)
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRACE | (1 & 0xFFFF)),
};
struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter[0]),
.filter = filter,
};
printf("创建带监控的日志过滤器\n");
printf("SECCOMP_RET_TRACE可以用于:\n");
printf(" - 系统调用追踪\n");
printf(" - 安全审计\n");
printf(" - 调试和分析\n");
// 启用seccomp
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1) {
printf("启用seccomp失败: %s\n", strerror(errno));
return -1;
}
printf("seccomp监控启用成功\n");
// 测试监控功能
printf("\n测试监控功能:\n");
// 允许的系统调用
write(STDOUT_FILENO, "允许的write调用\n", 17);
// 被监控的系统调用
printf("测试被监控的系统调用:\n");
pid_t pid = getpid();
printf("getpid()返回: %d\n", (int)pid);
uid_t uid = getuid();
printf("getuid()返回: %d\n", (int)uid);
printf("注意:在实际应用中,SECCOMP_RET_TRACE会触发ptrace事件\n");
printf("这需要额外的监控进程来处理追踪事件\n");
return 0;
}
int main() {
return demo_seccomp_monitoring();
}
示例5:安全沙箱应用
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
/**
* 安全沙箱配置
*/
typedef struct {
int allow_network;
int allow_file_write;
int allow_process_creation;
int allow_memory_mapping;
} sandbox_config_t;
/**
* 创建安全沙箱
*/
int create_secure_sandbox(const sandbox_config_t *config) {
printf("=== 创建安全沙箱 ===\n");
// 根据配置创建BPF过滤器
struct sock_filter filter[100];
int filter_index = 0;
// 基础加载系统调用号指令
filter[filter_index++] = BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
offsetof(struct seccomp_data, nr));
// 始终允许的系统调用
int essential_calls[] = {SYS_read, SYS_write,
#ifdef __x86_64__
60, // exit
231 // exit_group
#elif defined(__i386__)
1, // exit
252 // exit_group
#endif
};
for (size_t i = 0; i < sizeof(essential_calls)/sizeof(essential_calls[0]); i++) {
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
essential_calls[i], 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}
// 根据配置允许额外的系统调用
if (config->allow_file_write) {
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_openat, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_close, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}
if (config->allow_network) {
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_socket, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_connect, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}
if (config->allow_process_creation) {
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_fork, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_clone, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}
if (config->allow_memory_mapping) {
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_mmap, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_munmap, 0, 1);
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}
// 默认拒绝所有其他系统调用
filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K,
SECCOMP_RET_ERRNO | (EPERM & 0xFFFF));
struct sock_fprog prog = {
.len = filter_index,
.filter = filter,
};
printf("沙箱配置:\n");
printf(" 网络访问: %s\n", config->allow_network ? "允许" : "禁止");
printf(" 文件写入: %s\n", config->allow_file_write ? "允许" : "禁止");
printf(" 进程创建: %s\n", config->allow_process_creation ? "允许" : "禁止");
printf(" 内存映射: %s\n", config->allow_memory_mapping ? "允许" : "禁止");
// 应用沙箱
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
printf("创建沙箱失败: %s\n", strerror(errno));
return -1;
}
printf("安全沙箱创建成功\n");
return 0;
}
/**
* 演示不同安全级别的沙箱
*/
int demo_security_levels() {
sandbox_config_t configs[3] = {
// 最严格:只允许基本I/O
{0, 0, 0, 0},
// 中等:允许文件操作
{0, 1, 0, 1},
// 宽松:允许网络和进程创建
{1, 1, 1, 1}
};
const char *level_names[] = {"最高安全", "中等安全", "较低安全"};
printf("=== 不同安全级别沙箱演示 ===\n");
for (int level = 0; level < 3; level++) {
printf("\n--- %s级别沙箱 ---\n", level_names[level]);
if (create_secure_sandbox(&configs[level]) == 0) {
printf("沙箱 %s 创建成功\n", level_names[level]);
// 测试沙箱功能
write(STDOUT_FILENO, "基本I/O测试成功\n", 17);
if (configs[level].allow_network) {
printf("网络功能可用\n");
}
if (configs[level].allow_file_write) {
printf("文件写入功能可用\n");
}
// 由于seccomp策略一旦设置就不能放松,我们需要在子进程中测试
break; // 只测试第一个配置
}
}
return 0;
}
/**
* 演示沙箱的实际应用
*/
int demo_practical_sandbox() {
printf("=== 实际沙箱应用演示 ===\n");
// 创建一个限制性的沙箱:只允许基本操作
sandbox_config_t config = {0, 0, 0, 0}; // 最严格
if (create_secure_sandbox(&config) != 0) {
return -1;
}
printf("\n沙箱环境中运行测试程序:\n");
// 测试基本功能
printf("1. 基本输出测试:\n");
printf(" 标准输出工作正常\n");
write(STDOUT_FILENO, " write系统调用工作正常\n", 24);
// 测试被限制的功能
printf("\n2. 被限制功能测试:\n");
// 尝试网络操作
long result = syscall(SYS_socket, AF_INET, SOCK_STREAM, 0);
if (result == -1) {
printf(" 网络操作被成功阻止: %s\n", strerror(errno));
}
// 尝试文件写入
result = syscall(SYS_open, "/tmp/test", O_WRONLY | O_CREAT, 0644);
if (result == -1) {
printf(" 文件写入被成功阻止: %s\n", strerror(errno));
}
// 尝试进程创建
result = syscall(SYS_fork);
if (result == -1) {
printf(" 进程创建被成功阻止: %s\n", strerror(errno));
}
printf("\n3. 沙箱优势:\n");
printf(" ✓ 防止恶意代码执行危险操作\n");
printf(" ✓ 限制程序的权限范围\n");
printf(" ✓ 提供额外的安全层\n");
printf(" ✓ 可以与其它安全机制配合使用\n");
printf("\n4. 使用场景:\n");
printf(" - 插件或扩展的安全执行\n");
printf(" - 不可信代码的沙箱运行\n");
printf(" - 容器和虚拟化环境\n");
printf(" - 安全审计和监控\n");
return 0;
}
int main() {
printf("seccomp - Linux系统调用过滤机制\n");
printf("================================\n\n");
// 由于seccomp策略一旦设置就会影响整个进程,
// 我们分别在不同的子进程中演示不同功能
if (fork() == 0) {
return demo_practical_sandbox();
}
int status;
wait(&status);
return 0;
}
seccomp 使用注意事项
系统要求:
- 内核版本: 需要Linux 3.5或更高版本
- 架构支持: 支持多种CPU架构
- 编译选项: 需要内核编译时启用CONFIG_SECCOMP
权限要求:
1. CAP_SYS_ADMIN: 通常需要管理员权限
2. 无特权进程: 可以使用SECCOMP_MODE_STRICT
3. 容器环境: Docker等容器可能有限制
安全考虑:
1. 策略不可逆: 一旦应用,seccomp策略不能放松
2. 调试困难: 被阻止的系统调用可能难以调试
3. 兼容性: 可能影响程序的正常功能
4. 性能影响: BPF过滤会增加系统调用开销
最佳实践:
- 渐进式应用: 从宽松策略开始,逐步收紧
- 充分测试: 在生产环境前充分测试
- 错误处理: 妥善处理被阻止的系统调用
- 日志记录: 记录安全相关事件
- 备份方案: 提供策略失效时的处理方案
seccomp 模式详解
SECCOMP_MODE_STRICT (模式1):
- 特点: 最简单的模式,只允许read/write/exit/exit_group
- 优点: 简单、高效、安全
- 缺点: 功能极其有限
- 适用: 极度安全要求的简单程序
SECCOMP_MODE_FILTER (模式2):
- 特点: 使用BPF程序定义复杂过滤规则
- 优点: 灵活、功能强大
- 缺点: 配置复杂
- 适用: 大多数实际应用场景
常见系统调用编号
x86_64架构:
- SYS_read = 0
- SYS_write = 1
- SYS_open = 2
- SYS_close = 3
- SYS_stat = 4
- SYS_fstat = 5
- SYS_lstat = 6
- SYS_poll = 7
- SYS_lseek = 8
- SYS_mmap = 9
- SYS_mprotect = 10
- SYS_munmap = 11
- SYS_brk = 12
- SYS_rt_sigaction = 13
- SYS_rt_sigprocmask = 14
- SYS_rt_sigreturn = 15
- SYS_ioctl = 16
- SYS_pread64 = 17
- SYS_pwrite64 = 18
- SYS_readv = 19
- SYS_writev = 20
- SYS_access = 21
- SYS_pipe = 22
- SYS_select = 23
- SYS_sched_yield = 24
- SYS_mremap = 25
- SYS_msync = 26
- SYS_mincore = 27
- SYS_madvise = 28
- SYS_shmget = 29
- SYS_shmat = 30
- SYS_shmctl = 31
- SYS_dup = 32
- SYS_dup2 = 33
- SYS_pause = 34
- SYS_nanosleep = 35
- SYS_getitimer = 36
- SYS_alarm = 37
- SYS_setitimer = 38
- SYS_getpid = 39
- SYS_sendfile = 40
- SYS_socket = 41
- SYS_connect = 42
- SYS_accept = 43
- SYS_sendto = 44
- SYS_recvfrom = 45
- SYS_sendmsg = 46
- SYS_recvmsg = 47
- SYS_shutdown = 48
- SYS_bind = 49
- SYS_listen = 50
- SYS_getsockname = 51
- SYS_getpeername = 52
- SYS_socketpair = 53
- SYS_setsockopt = 54
- SYS_getsockopt = 55
- SYS_clone = 56
- SYS_fork = 57
- SYS_vfork = 58
- SYS_execve = 59
- SYS_exit = 60
- SYS_wait4 = 61
- SYS_kill = 62
- SYS_uname = 63
- SYS_semget = 64
- SYS_semop = 65
- SYS_semctl = 66
- SYS_shmdt = 67
- SYS_msgget = 68
- SYS_msgsnd = 69
- SYS_msgrcv = 70
- SYS_msgctl = 71
- SYS_fcntl = 72
- SYS_flock = 73
- SYS_fsync = 74
- SYS_fdatasync = 75
- SYS_truncate = 76
- SYS_ftruncate = 77
- SYS_getdents = 78
- SYS_getcwd = 79
- SYS_chdir = 80
- SYS_fchdir = 81
- SYS_rename = 82
- SYS_mkdir = 83
- SYS_rmdir = 84
- SYS_creat = 85
- SYS_link = 86
- SYS_unlink = 87
- SYS_symlink = 88
- SYS_readlink = 89
- SYS_chmod = 90
- SYS_fchmod = 91
- SYS_chown = 92
- SYS_fchown = 93
- SYS_lchown = 94
- SYS_umask = 95
- SYS_gettimeofday = 96
- SYS_getrlimit = 97
- SYS_getrusage = 98
- SYS_sysinfo = 99
- SYS_times = 100
- SYS_ptrace = 101
- SYS_getuid = 102
- SYS_syslog = 103
- SYS_getgid = 104
- SYS_setuid = 105
- SYS_setgid = 106
- SYS_geteuid = 107
- SYS_getegid = 108
- SYS_setpgid = 109
- SYS_getppid = 110
- SYS_getpgrp = 111
- SYS_setsid = 112
- SYS_setreuid = 113
- SYS_setregid = 114
- SYS_getgroups = 115
- SYS_setgroups = 116
- SYS_setresuid = 117
- SYS_getresuid = 118
- SYS_setresgid = 119
- SYS_getresgid = 120
- SYS_getpgid = 121
- SYS_setfsuid = 122
- SYS_setfsgid = 123
- SYS_getsid = 124
- SYS_capget = 125
- SYS_capset = 126
- SYS_rt_sigpending = 127
- SYS_rt_sigtimedwait = 128
- SYS_rt_sigqueueinfo = 129
- SYS_rt_sigsuspend = 130
- SYS_sigaltstack = 131
- SYS_utime = 132
- SYS_mknod = 133
- SYS_uselib = 134
- SYS_personality = 135
- SYS_ustat = 136
- SYS_statfs = 137
- SYS_fstatfs = 138
- SYS_sysfs = 139
- SYS_getpriority = 140
- SYS_setpriority = 141
- SYS_sched_setparam = 142
- SYS_sched_getparam = 143
- SYS_sched_setscheduler = 144
- SYS_sched_getscheduler = 145
- SYS_sched_get_priority_max = 146
- SYS_sched_get_priority_min = 147
- SYS_sched_rr_get_interval = 148
- SYS_mlock = 149
- SYS_munlock = 150
- SYS_mlockall = 151
- SYS_munlockall = 152
- SYS_vhangup = 153
- SYS_modify_ldt = 154
- SYS_pivot_root = 155
- SYS__sysctl = 156
- SYS_prctl = 157
- SYS_arch_prctl = 158
- SYS_adjtimex = 159
- SYS_setrlimit = 160
- SYS_chroot = 161
- SYS_sync = 162
- SYS_acct = 163
- SYS_settimeofday = 164
- SYS_mount = 165
- SYS_umount2 = 166
- SYS_swapon = 167
- SYS_swapoff = 168
- SYS_reboot = 169
- SYS_sethostname = 170
- SYS_setdomainname = 171
- SYS_iopl = 172
- SYS_ioperm = 173
- SYS_create_module = 174
- SYS_init_module = 175
- SYS_delete_module = 176
- SYS_get_kernel_syms = 177
- SYS_query_module = 178
- SYS_quotactl = 179
- SYS_nfsservctl = 180
- SYS_getpmsg = 181
- SYS_putpmsg = 182
- SYS_afs_syscall = 183
- SYS_tuxcall = 184
- SYS_security = 185
- SYS_gettid = 186
- SYS_readahead = 187
- SYS_setxattr = 188
- SYS_lsetxattr = 189
- SYS_fsetxattr = 190
- SYS_getxattr = 191
- SYS_lgetxattr = 192
- SYS_fgetxattr = 193
- SYS_listxattr = 194
- SYS_llistxattr = 195
- SYS_flistxattr = 196
- SYS_removexattr = 197
- SYS_lremovexattr = 198
- SYS_fremovexattr = 199
- SYS_tkill = 200
- SYS_time = 201
- SYS_futex = 202
- SYS_sched_setaffinity = 203
- SYS_sched_getaffinity = 204
- SYS_set_thread_area = 205
- SYS_io_setup = 206
- SYS_io_destroy = 207
- SYS_io_getevents = 208
- SYS_io_submit = 209
- SYS_io_cancel = 210
- SYS_get_thread_area = 211
- SYS_lookup_dcookie = 212
- SYS_epoll_create = 213
- SYS_epoll_ctl_old = 214
- SYS_epoll_wait_old = 215
- SYS_remap_file_pages = 216
- SYS_getdents64 = 217
- SYS_set_tid_address = 218
- SYS_restart_syscall = 219
- SYS_semtimedop = 220
- SYS_fadvise64 = 221
- SYS_timer_create = 222
- SYS_timer_settime = 223
- SYS_timer_gettime = 224
- SYS_timer_getoverrun = 225
- SYS_timer_delete = 226
- SYS_clock_settime = 227
- SYS_clock_gettime = 228
- SYS_clock_getres = 229
- SYS_clock_nanosleep = 230
- SYS_exit_group = 231
- SYS_epoll_wait = 232
- SYS_epoll_ctl = 233
- SYS_tgkill = 234
- SYS_utimes = 235
- SYS_vserver = 236
- SYS_mbind = 237
- SYS_set_mempolicy = 238
- SYS_get_mempolicy = 239
- SYS_mq_open = 240
- SYS_mq_unlink = 241
- SYS_mq_timedsend = 242
- SYS_mq_timedreceive = 243
- SYS_mq_notify = 244
- SYS_mq_getsetattr = 245
- SYS_kexec_load = 246
- SYS_waitid = 247
- SYS_add_key = 248
- SYS_request_key = 249
- SYS_keyctl = 250
- SYS_ioprio_set = 251
- SYS_ioprio_get = 252
- SYS_inotify_init = 253
- SYS_inotify_add_watch = 254
- SYS_inotify_rm_watch = 255
- SYS_migrate_pages = 256
- SYS_openat = 257
- SYS_mkdirat = 258
- SYS_mknodat = 259
- SYS_fchownat = 260
- SYS_futimesat = 261
- SYS_newfstatat = 262
- SYS_unlinkat = 263
- SYS_renameat = 264
- SYS_linkat = 265
- SYS_symlinkat = 266
- SYS_readlinkat = 267
- SYS_fchmodat = 268
- SYS_faccessat = 269
- SYS_pselect6 = 270
- SYS_ppoll = 271
- SYS_unshare = 272
- SYS_set_robust_list = 273
- SYS_get_robust_list = 274
- SYS_splice = 275
- SYS_tee = 276
- SYS_sync_file_range = 277
- SYS_vmsplice = 278
- SYS_move_pages = 279
- SYS_utimensat = 280
- SYS_epoll_pwait = 281
- SYS_signalfd = 282
- SYS_timerfd_create = 283
- SYS_eventfd = 284
- SYS_fallocate = 285
- SYS_timerfd_settime = 286
- SYS_timerfd_gettime = 287
- SYS_accept4 = 288
- SYS_signalfd4 = 289
- SYS_eventfd2 = 290
- SYS_epoll_create1 = 291
- SYS_dup3 = 292
- SYS_pipe2 = 293
- SYS_inotify_init1 = 294
- SYS_preadv = 295
- SYS_pwritev = 296
- SYS_rt_tgsigqueueinfo = 297
- SYS_perf_event_open = 298
- SYS_recvmmsg = 299
- SYS_fanotify_init = 300
- SYS_fanotify_mark = 301
- SYS_prlimit64 = 302
- SYS_name_to_handle_at = 303
- SYS_open_by_handle_at = 304
- SYS_clock_adjtime = 305
- SYS_syncfs = 306
- SYS_sendmmsg = 307
- SYS_setns = 308
- SYS_getcpu = 309
- SYS_process_vm_readv = 310
- SYS_process_vm_writev = 311
- SYS_kcmp = 312
- SYS_finit_module = 313
- SYS_sched_setattr = 314
- SYS_sched_getattr = 315
- SYS_renameat2 = 316
- SYS_seccomp = 317
- SYS_getrandom = 318
- SYS_memfd_create = 319
- SYS_kexec_file_load = 320
- SYS_bpf = 321
- SYS_execveat = 322
- SYS_userfaultfd = 323
- SYS_membarrier = 324
- SYS_mlock2 = 325
- SYS_copy_file_range = 326
- SYS_preadv2 = 327
- SYS_pwritev2 = 328
- SYS_pkey_mprotect = 329
- SYS_pkey_alloc = 330
- SYS_pkey_free = 331
- SYS_statx = 332
- SYS_io_pgetevents = 333
- SYS_rseq = 334
总结
seccomp
是Linux系统中强大的安全机制,提供了:
通过合理使用seccomp
,可以显著提高应用程序的安全性,构建更加安全可靠的计算环境。在实际应用中,需要仔细设计过滤策略,充分测试,并考虑错误处理和调试需求。