1. 函数介绍
2. 函数原型
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <unistd.h>
int prctl(int option, unsigned long arg2, unsigned long arg3, 
          unsigned long arg4, unsigned long arg5);
int seccomp(unsigned int operation, unsigned int flags, void *args);
3. 功能
- 限制进程可执行的系统调用集合
 - 定义系统调用的执行策略(允许、错误、终止)
 - 使用BPF程序实现复杂的过滤逻辑
 - 构建安全的沙箱环境
 
4. 参数
prctl方式:
- int option: 控制选项(如PR_SET_SECCOMP)
 - unsigned long arg2: seccomp模式(SECCOMP_MODE_STRICT/SECCOMP_MODE_FILTER)
 - 其他参数: 根据选项而定
 
seccomp系统调用:
- unsigned int operation: 操作类型(SECCOMP_SET_MODE_STRICT/SECCOMP_SET_MODE_FILTER)
 - unsigned int flags: 标志位(通常为0)
 - *void args: 操作参数(BPF程序指针等)
 
5. 返回值
- 成功: 返回0
 - 失败: 返回-1,并设置errno
 
6. 相似函数,或关联函数
- prctl: 进程控制接口
 - personality: 设置进程执行特性
 - chroot: 改变根目录
 - capset: 设置进程权限
 
7. 示例代码
示例1:基础seccomp使用
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
/**
 * 演示基础seccomp使用方法
 */
int demo_seccomp_basic() {
    printf("=== 基础seccomp使用示例 ===\n");
    
    // 显示当前seccomp状态
    int current_mode = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
    printf("当前seccomp模式: ");
    switch (current_mode) {
        case 0:
            printf("SECCOMP_MODE_DISABLED (禁用)\n");
            break;
        case 1:
            printf("SECCOMP_MODE_STRICT (严格模式)\n");
            break;
        case 2:
            printf("SECCOMP_MODE_FILTER (过滤模式)\n");
            break;
        default:
            printf("未知模式 (%d)\n", current_mode);
            break;
    }
    
    // 测试普通系统调用(应该成功)
    printf("测试普通系统调用...\n");
    write(STDOUT_FILENO, "  普通write调用成功\n", 21);
    
    // 启用严格模式seccomp
    printf("启用seccomp严格模式...\n");
    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0) == -1) {
        printf("启用seccomp失败: %s\n", strerror(errno));
        printf("注意:严格模式只允许read/write/exit/exit_group系统调用\n");
        return -1;
    }
    
    printf("seccomp严格模式启用成功\n");
    printf("当前seccomp模式: %d\n", prctl(PR_GET_SECCOMP, 0, 0, 0, 0));
    
    // 测试允许的系统调用
    printf("测试允许的系统调用...\n");
    write(STDOUT_FILENO, "  write调用仍然允许\n", 20);
    
    // 测试不允许的系统调用(这会导致程序终止)
    printf("测试不允许的系统调用(程序将终止)...\n");
    printf("  尝试调用getpid()...\n");
    
    // 注意:下面的调用会导致程序被SIGKILL终止
    // 为了演示目的,我们注释掉危险操作
    /*
    pid_t pid = getpid();  // 这会导致程序终止!
    printf("getpid()返回: %d\n", pid);
    */
    
    printf("  注意:getpid()等系统调用在严格模式下会被禁止\n");
    printf("  实际执行会导致程序被SIGKILL终止\n");
    
    return 0;
}
int main() {
    return demo_seccomp_basic();
}
示例2:自定义BPF过滤器
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
/**
 * 创建允许特定系统调用的BPF过滤器
 */
int demo_custom_bpf_filter() {
    printf("=== 自定义BPF过滤器示例 ===\n");
    
    // 定义BPF过滤器程序
    // 允许的系统调用:read, write, exit, exit_group
    struct sock_filter filter[] = {
        // 加载系统调用号到累加器
        BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
        
        // 允许 read 系统调用 (SYS_read = 0)
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 允许 write 系统调用 (SYS_write = 1)
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 允许 exit 系统调用 (SYS_exit = 60 on x86_64)
#ifdef __x86_64__
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1),
#elif defined(__i386__)
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1),
#endif
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 允许 exit_group 系统调用
#ifdef __x86_64__
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1),
#elif defined(__i386__)
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 252, 0, 1),
#endif
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 其他系统调用返回EPERM错误
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
    };
    
    struct sock_fprog prog = {
        .len = sizeof(filter) / sizeof(filter[0]),
        .filter = filter,
    };
    
    // 显示过滤器信息
    printf("创建BPF过滤器,允许系统调用:\n");
    printf("  read(%d), write(%d), exit(%d), exit_group(%d)\n", 
#ifdef __x86_64__
           SYS_read, SYS_write, 60, 231
#elif defined(__i386__)
           SYS_read, SYS_write, 1, 252
#endif
    );
    printf("其他系统调用将返回EPERM错误\n");
    
    // 应用BPF过滤器
    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
        printf("应用BPF过滤器失败: %s\n", strerror(errno));
        printf("可能的原因:\n");
        printf("  1. 内核不支持seccomp BPF\n");
        printf("  2. 缺少CAP_SYS_ADMIN权限\n");
        printf("  3. 已经设置了seccomp策略\n");
        return -1;
    }
    
    printf("BPF过滤器应用成功\n");
    
    // 测试允许的系统调用
    printf("\n测试允许的系统调用:\n");
    write(STDOUT_FILENO, "  write调用成功\n", 16);
    
    char buffer[10];
    ssize_t bytes_read = read(STDIN_FILENO, buffer, sizeof(buffer));
    if (bytes_read >= 0) {
        printf("  read调用成功\n");
    }
    
    // 测试不允许的系统调用
    printf("\n测试不允许的系统调用:\n");
    long result = syscall(SYS_getpid);
    if (result == -1) {
        printf("  getpid调用被阻止: %s\n", strerror(errno));
    } else {
        printf("  getpid调用意外成功: %ld\n", result);
    }
    
    result = syscall(SYS_open, "/etc/passwd", 0);
    if (result == -1) {
        printf("  open调用被阻止: %s\n", strerror(errno));
    } else {
        printf("  open调用意外成功: %ld\n", result);
    }
    
    printf("\n安全的系统调用仍然可以正常工作\n");
    
    return 0;
}
int main() {
    return demo_custom_bpf_filter();
}
示例3:只读沙箱环境
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/stat.h>
/**
 * 创建只读沙箱环境的BPF过滤器
 */
int demo_readonly_sandbox() {
    printf("=== 只读沙箱环境示例 ===\n");
    
    // 定义只读沙箱的BPF过滤器
    // 允许读操作和基本系统调用,禁止写操作
    struct sock_filter filter[] = {
        // 加载系统调用号
        BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
        
        // 允许 read 系统调用
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 允许 write 系统调用(仅允许写到stdout/stderr)
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 5),
        // 检查文件描述符是否为stdout(1)或stderr(2)
        BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1),  // stdout
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 1),  // stderr
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
        
        // 允许 exit 和 exit_group
#ifdef __x86_64__
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1),   // exit
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1),  // exit_group
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#elif defined(__i386__)
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1),    // exit
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 252, 0, 1),  // exit_group
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#endif
        
        // 允许 read-only 文件操作
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 3),
        // 检查打开标志是否包含O_RDONLY
        BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[1])),
        BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, O_RDONLY, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
        
        // 允许 close 系统调用
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_close, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 禁止其他所有系统调用
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
    };
    
    struct sock_fprog prog = {
        .len = sizeof(filter) / sizeof(filter[0]),
        .filter = filter,
    };
    
    printf("创建只读沙箱环境\n");
    printf("允许的操作:\n");
    printf("  - 读取文件(只读模式)\n");
    printf("  - 写入标准输出和标准错误\n");
    printf("  - 基本的进程控制\n");
    printf("禁止的操作:\n");
    printf("  - 写入文件\n");
    printf("  - 网络操作\n");
    printf("  - 进程创建\n");
    printf("  - 其他危险操作\n");
    
    // 应用过滤器
    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
        printf("创建沙箱失败: %s\n", strerror(errno));
        return -1;
    }
    
    printf("只读沙箱创建成功\n");
    
    // 测试沙箱功能
    printf("\n=== 沙箱功能测试 ===\n");
    
    // 测试允许的读操作
    printf("1. 测试允许的读操作:\n");
    int fd = open("/etc/passwd", O_RDONLY);
    if (fd != -1) {
        char buffer[100];
        ssize_t bytes = read(fd, buffer, sizeof(buffer));
        if (bytes > 0) {
            printf("  读取/etc/passwd成功 (%zd 字节)\n", bytes);
        }
        close(fd);
    } else {
        printf("  打开/etc/passwd失败: %s\n", strerror(errno));
    }
    
    // 测试允许的写操作(stdout/stderr)
    printf("\n2. 测试允许的写操作:\n");
    write(STDOUT_FILENO, "  写入stdout成功\n", 17);
    write(STDERR_FILENO, "  写入stderr成功\n", 17);
    
    // 测试禁止的写操作
    printf("\n3. 测试禁止的写操作:\n");
    fd = open("/tmp/test_seccomp", O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd == -1) {
        printf("  创建文件被阻止: %s\n", strerror(errno));
    } else {
        printf("  创建文件意外成功\n");
        close(fd);
        unlink("/tmp/test_seccomp");
    }
    
    // 测试禁止的系统调用
    printf("\n4. 测试禁止的系统调用:\n");
    long result = syscall(SYS_fork);
    if (result == -1) {
        printf("  fork被阻止: %s\n", strerror(errno));
    }
    
    result = syscall(SYS_socket, AF_INET, SOCK_STREAM, 0);
    if (result == -1) {
        printf("  socket被阻止: %s\n", strerror(errno));
    }
    
    printf("\n沙箱环境测试完成\n");
    
    return 0;
}
int main() {
    return demo_readonly_sandbox();
}
示例4:进程监控和日志
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <signal.h>
#include <sys/wait.h>
/**
 * 信号处理函数
 */
void signal_handler(int sig) {
    printf("捕获信号 %d\n", sig);
    if (sig == SIGSYS) {
        printf("检测到被禁止的系统调用\n");
    }
}
/**
 * 演示seccomp的监控和日志功能
 */
int demo_seccomp_monitoring() {
    printf("=== seccomp监控和日志示例 ===\n");
    
    // 注册信号处理程序来捕获SIGSYS
    signal(SIGSYS, signal_handler);
    
    // 创建带日志的BPF过滤器
    struct sock_filter filter[] = {
        // 加载系统调用号
        BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),
        
        // 允许基本的读写操作
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1),
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        
        // 允许exit相关调用
#ifdef __x86_64__
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1),   // exit
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
        BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1),  // exit_group
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#endif
        
        // 对于其他系统调用,返回追踪标志(用于日志)
        BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRACE | (1 & 0xFFFF)),
    };
    
    struct sock_fprog prog = {
        .len = sizeof(filter) / sizeof(filter[0]),
        .filter = filter,
    };
    
    printf("创建带监控的日志过滤器\n");
    printf("SECCOMP_RET_TRACE可以用于:\n");
    printf("  - 系统调用追踪\n");
    printf("  - 安全审计\n");
    printf("  - 调试和分析\n");
    
    // 启用seccomp
    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1) {
        printf("启用seccomp失败: %s\n", strerror(errno));
        return -1;
    }
    
    printf("seccomp监控启用成功\n");
    
    // 测试监控功能
    printf("\n测试监控功能:\n");
    
    // 允许的系统调用
    write(STDOUT_FILENO, "允许的write调用\n", 17);
    
    // 被监控的系统调用
    printf("测试被监控的系统调用:\n");
    
    pid_t pid = getpid();
    printf("getpid()返回: %d\n", (int)pid);
    
    uid_t uid = getuid();
    printf("getuid()返回: %d\n", (int)uid);
    
    printf("注意:在实际应用中,SECCOMP_RET_TRACE会触发ptrace事件\n");
    printf("这需要额外的监控进程来处理追踪事件\n");
    
    return 0;
}
int main() {
    return demo_seccomp_monitoring();
}
示例5:安全沙箱应用
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
/**
 * 安全沙箱配置
 */
typedef struct {
    int allow_network;
    int allow_file_write;
    int allow_process_creation;
    int allow_memory_mapping;
} sandbox_config_t;
/**
 * 创建安全沙箱
 */
int create_secure_sandbox(const sandbox_config_t *config) {
    printf("=== 创建安全沙箱 ===\n");
    
    // 根据配置创建BPF过滤器
    struct sock_filter filter[100];
    int filter_index = 0;
    
    // 基础加载系统调用号指令
    filter[filter_index++] = BPF_STMT(BPF_LD | BPF_W | BPF_ABS, 
                                     offsetof(struct seccomp_data, nr));
    
    // 始终允许的系统调用
    int essential_calls[] = {SYS_read, SYS_write, 
#ifdef __x86_64__
                           60,  // exit
                           231  // exit_group
#elif defined(__i386__)
                           1,   // exit
                           252  // exit_group
#endif
    };
    
    for (size_t i = 0; i < sizeof(essential_calls)/sizeof(essential_calls[0]); i++) {
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 
                                         essential_calls[i], 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
    }
    
    // 根据配置允许额外的系统调用
    if (config->allow_file_write) {
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
        
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_openat, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
        
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_close, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
    }
    
    if (config->allow_network) {
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_socket, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
        
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_connect, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
    }
    
    if (config->allow_process_creation) {
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_fork, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
        
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_clone, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
    }
    
    if (config->allow_memory_mapping) {
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_mmap, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
        
        filter[filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_munmap, 0, 1);
        filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
    }
    
    // 默认拒绝所有其他系统调用
    filter[filter_index++] = BPF_STMT(BPF_RET | BPF_K, 
                                     SECCOMP_RET_ERRNO | (EPERM & 0xFFFF));
    
    struct sock_fprog prog = {
        .len = filter_index,
        .filter = filter,
    };
    
    printf("沙箱配置:\n");
    printf("  网络访问: %s\n", config->allow_network ? "允许" : "禁止");
    printf("  文件写入: %s\n", config->allow_file_write ? "允许" : "禁止");
    printf("  进程创建: %s\n", config->allow_process_creation ? "允许" : "禁止");
    printf("  内存映射: %s\n", config->allow_memory_mapping ? "允许" : "禁止");
    
    // 应用沙箱
    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
        printf("创建沙箱失败: %s\n", strerror(errno));
        return -1;
    }
    
    printf("安全沙箱创建成功\n");
    return 0;
}
/**
 * 演示不同安全级别的沙箱
 */
int demo_security_levels() {
    sandbox_config_t configs[3] = {
        // 最严格:只允许基本I/O
        {0, 0, 0, 0},
        
        // 中等:允许文件操作
        {0, 1, 0, 1},
        
        // 宽松:允许网络和进程创建
        {1, 1, 1, 1}
    };
    
    const char *level_names[] = {"最高安全", "中等安全", "较低安全"};
    
    printf("=== 不同安全级别沙箱演示 ===\n");
    
    for (int level = 0; level < 3; level++) {
        printf("\n--- %s级别沙箱 ---\n", level_names[level]);
        
        if (create_secure_sandbox(&configs[level]) == 0) {
            printf("沙箱 %s 创建成功\n", level_names[level]);
            
            // 测试沙箱功能
            write(STDOUT_FILENO, "基本I/O测试成功\n", 17);
            
            if (configs[level].allow_network) {
                printf("网络功能可用\n");
            }
            
            if (configs[level].allow_file_write) {
                printf("文件写入功能可用\n");
            }
            
            // 由于seccomp策略一旦设置就不能放松,我们需要在子进程中测试
            break;  // 只测试第一个配置
        }
    }
    
    return 0;
}
/**
 * 演示沙箱的实际应用
 */
int demo_practical_sandbox() {
    printf("=== 实际沙箱应用演示 ===\n");
    
    // 创建一个限制性的沙箱:只允许基本操作
    sandbox_config_t config = {0, 0, 0, 0};  // 最严格
    
    if (create_secure_sandbox(&config) != 0) {
        return -1;
    }
    
    printf("\n沙箱环境中运行测试程序:\n");
    
    // 测试基本功能
    printf("1. 基本输出测试:\n");
    printf("   标准输出工作正常\n");
    write(STDOUT_FILENO, "   write系统调用工作正常\n", 24);
    
    // 测试被限制的功能
    printf("\n2. 被限制功能测试:\n");
    
    // 尝试网络操作
    long result = syscall(SYS_socket, AF_INET, SOCK_STREAM, 0);
    if (result == -1) {
        printf("   网络操作被成功阻止: %s\n", strerror(errno));
    }
    
    // 尝试文件写入
    result = syscall(SYS_open, "/tmp/test", O_WRONLY | O_CREAT, 0644);
    if (result == -1) {
        printf("   文件写入被成功阻止: %s\n", strerror(errno));
    }
    
    // 尝试进程创建
    result = syscall(SYS_fork);
    if (result == -1) {
        printf("   进程创建被成功阻止: %s\n", strerror(errno));
    }
    
    printf("\n3. 沙箱优势:\n");
    printf("   ✓ 防止恶意代码执行危险操作\n");
    printf("   ✓ 限制程序的权限范围\n");
    printf("   ✓ 提供额外的安全层\n");
    printf("   ✓ 可以与其它安全机制配合使用\n");
    
    printf("\n4. 使用场景:\n");
    printf("   - 插件或扩展的安全执行\n");
    printf("   - 不可信代码的沙箱运行\n");
    printf("   - 容器和虚拟化环境\n");
    printf("   - 安全审计和监控\n");
    
    return 0;
}
int main() {
    printf("seccomp - Linux系统调用过滤机制\n");
    printf("================================\n\n");
    
    // 由于seccomp策略一旦设置就会影响整个进程,
    // 我们分别在不同的子进程中演示不同功能
    
    if (fork() == 0) {
        return demo_practical_sandbox();
    }
    
    int status;
    wait(&status);
    
    return 0;
}
seccomp 使用注意事项
系统要求:
- 内核版本: 需要Linux 3.5或更高版本
 - 架构支持: 支持多种CPU架构
 - 编译选项: 需要内核编译时启用CONFIG_SECCOMP
 
权限要求:
1. CAP_SYS_ADMIN: 通常需要管理员权限
2. 无特权进程: 可以使用SECCOMP_MODE_STRICT
3. 容器环境: Docker等容器可能有限制
安全考虑:
1. 策略不可逆: 一旦应用,seccomp策略不能放松
2. 调试困难: 被阻止的系统调用可能难以调试
3. 兼容性: 可能影响程序的正常功能
4. 性能影响: BPF过滤会增加系统调用开销
最佳实践:
- 渐进式应用: 从宽松策略开始,逐步收紧
 - 充分测试: 在生产环境前充分测试
 - 错误处理: 妥善处理被阻止的系统调用
 - 日志记录: 记录安全相关事件
 - 备份方案: 提供策略失效时的处理方案
 
seccomp 模式详解
SECCOMP_MODE_STRICT (模式1):
- 特点: 最简单的模式,只允许read/write/exit/exit_group
 - 优点: 简单、高效、安全
 - 缺点: 功能极其有限
 - 适用: 极度安全要求的简单程序
 
SECCOMP_MODE_FILTER (模式2):
- 特点: 使用BPF程序定义复杂过滤规则
 - 优点: 灵活、功能强大
 - 缺点: 配置复杂
 - 适用: 大多数实际应用场景
 
常见系统调用编号
x86_64架构:
- SYS_read = 0
 - SYS_write = 1
 - SYS_open = 2
 - SYS_close = 3
 - SYS_stat = 4
 - SYS_fstat = 5
 - SYS_lstat = 6
 - SYS_poll = 7
 - SYS_lseek = 8
 - SYS_mmap = 9
 - SYS_mprotect = 10
 - SYS_munmap = 11
 - SYS_brk = 12
 - SYS_rt_sigaction = 13
 - SYS_rt_sigprocmask = 14
 - SYS_rt_sigreturn = 15
 - SYS_ioctl = 16
 - SYS_pread64 = 17
 - SYS_pwrite64 = 18
 - SYS_readv = 19
 - SYS_writev = 20
 - SYS_access = 21
 - SYS_pipe = 22
 - SYS_select = 23
 - SYS_sched_yield = 24
 - SYS_mremap = 25
 - SYS_msync = 26
 - SYS_mincore = 27
 - SYS_madvise = 28
 - SYS_shmget = 29
 - SYS_shmat = 30
 - SYS_shmctl = 31
 - SYS_dup = 32
 - SYS_dup2 = 33
 - SYS_pause = 34
 - SYS_nanosleep = 35
 - SYS_getitimer = 36
 - SYS_alarm = 37
 - SYS_setitimer = 38
 - SYS_getpid = 39
 - SYS_sendfile = 40
 - SYS_socket = 41
 - SYS_connect = 42
 - SYS_accept = 43
 - SYS_sendto = 44
 - SYS_recvfrom = 45
 - SYS_sendmsg = 46
 - SYS_recvmsg = 47
 - SYS_shutdown = 48
 - SYS_bind = 49
 - SYS_listen = 50
 - SYS_getsockname = 51
 - SYS_getpeername = 52
 - SYS_socketpair = 53
 - SYS_setsockopt = 54
 - SYS_getsockopt = 55
 - SYS_clone = 56
 - SYS_fork = 57
 - SYS_vfork = 58
 - SYS_execve = 59
 - SYS_exit = 60
 - SYS_wait4 = 61
 - SYS_kill = 62
 - SYS_uname = 63
 - SYS_semget = 64
 - SYS_semop = 65
 - SYS_semctl = 66
 - SYS_shmdt = 67
 - SYS_msgget = 68
 - SYS_msgsnd = 69
 - SYS_msgrcv = 70
 - SYS_msgctl = 71
 - SYS_fcntl = 72
 - SYS_flock = 73
 - SYS_fsync = 74
 - SYS_fdatasync = 75
 - SYS_truncate = 76
 - SYS_ftruncate = 77
 - SYS_getdents = 78
 - SYS_getcwd = 79
 - SYS_chdir = 80
 - SYS_fchdir = 81
 - SYS_rename = 82
 - SYS_mkdir = 83
 - SYS_rmdir = 84
 - SYS_creat = 85
 - SYS_link = 86
 - SYS_unlink = 87
 - SYS_symlink = 88
 - SYS_readlink = 89
 - SYS_chmod = 90
 - SYS_fchmod = 91
 - SYS_chown = 92
 - SYS_fchown = 93
 - SYS_lchown = 94
 - SYS_umask = 95
 - SYS_gettimeofday = 96
 - SYS_getrlimit = 97
 - SYS_getrusage = 98
 - SYS_sysinfo = 99
 - SYS_times = 100
 - SYS_ptrace = 101
 - SYS_getuid = 102
 - SYS_syslog = 103
 - SYS_getgid = 104
 - SYS_setuid = 105
 - SYS_setgid = 106
 - SYS_geteuid = 107
 - SYS_getegid = 108
 - SYS_setpgid = 109
 - SYS_getppid = 110
 - SYS_getpgrp = 111
 - SYS_setsid = 112
 - SYS_setreuid = 113
 - SYS_setregid = 114
 - SYS_getgroups = 115
 - SYS_setgroups = 116
 - SYS_setresuid = 117
 - SYS_getresuid = 118
 - SYS_setresgid = 119
 - SYS_getresgid = 120
 - SYS_getpgid = 121
 - SYS_setfsuid = 122
 - SYS_setfsgid = 123
 - SYS_getsid = 124
 - SYS_capget = 125
 - SYS_capset = 126
 - SYS_rt_sigpending = 127
 - SYS_rt_sigtimedwait = 128
 - SYS_rt_sigqueueinfo = 129
 - SYS_rt_sigsuspend = 130
 - SYS_sigaltstack = 131
 - SYS_utime = 132
 - SYS_mknod = 133
 - SYS_uselib = 134
 - SYS_personality = 135
 - SYS_ustat = 136
 - SYS_statfs = 137
 - SYS_fstatfs = 138
 - SYS_sysfs = 139
 - SYS_getpriority = 140
 - SYS_setpriority = 141
 - SYS_sched_setparam = 142
 - SYS_sched_getparam = 143
 - SYS_sched_setscheduler = 144
 - SYS_sched_getscheduler = 145
 - SYS_sched_get_priority_max = 146
 - SYS_sched_get_priority_min = 147
 - SYS_sched_rr_get_interval = 148
 - SYS_mlock = 149
 - SYS_munlock = 150
 - SYS_mlockall = 151
 - SYS_munlockall = 152
 - SYS_vhangup = 153
 - SYS_modify_ldt = 154
 - SYS_pivot_root = 155
 - SYS__sysctl = 156
 - SYS_prctl = 157
 - SYS_arch_prctl = 158
 - SYS_adjtimex = 159
 - SYS_setrlimit = 160
 - SYS_chroot = 161
 - SYS_sync = 162
 - SYS_acct = 163
 - SYS_settimeofday = 164
 - SYS_mount = 165
 - SYS_umount2 = 166
 - SYS_swapon = 167
 - SYS_swapoff = 168
 - SYS_reboot = 169
 - SYS_sethostname = 170
 - SYS_setdomainname = 171
 - SYS_iopl = 172
 - SYS_ioperm = 173
 - SYS_create_module = 174
 - SYS_init_module = 175
 - SYS_delete_module = 176
 - SYS_get_kernel_syms = 177
 - SYS_query_module = 178
 - SYS_quotactl = 179
 - SYS_nfsservctl = 180
 - SYS_getpmsg = 181
 - SYS_putpmsg = 182
 - SYS_afs_syscall = 183
 - SYS_tuxcall = 184
 - SYS_security = 185
 - SYS_gettid = 186
 - SYS_readahead = 187
 - SYS_setxattr = 188
 - SYS_lsetxattr = 189
 - SYS_fsetxattr = 190
 - SYS_getxattr = 191
 - SYS_lgetxattr = 192
 - SYS_fgetxattr = 193
 - SYS_listxattr = 194
 - SYS_llistxattr = 195
 - SYS_flistxattr = 196
 - SYS_removexattr = 197
 - SYS_lremovexattr = 198
 - SYS_fremovexattr = 199
 - SYS_tkill = 200
 - SYS_time = 201
 - SYS_futex = 202
 - SYS_sched_setaffinity = 203
 - SYS_sched_getaffinity = 204
 - SYS_set_thread_area = 205
 - SYS_io_setup = 206
 - SYS_io_destroy = 207
 - SYS_io_getevents = 208
 - SYS_io_submit = 209
 - SYS_io_cancel = 210
 - SYS_get_thread_area = 211
 - SYS_lookup_dcookie = 212
 - SYS_epoll_create = 213
 - SYS_epoll_ctl_old = 214
 - SYS_epoll_wait_old = 215
 - SYS_remap_file_pages = 216
 - SYS_getdents64 = 217
 - SYS_set_tid_address = 218
 - SYS_restart_syscall = 219
 - SYS_semtimedop = 220
 - SYS_fadvise64 = 221
 - SYS_timer_create = 222
 - SYS_timer_settime = 223
 - SYS_timer_gettime = 224
 - SYS_timer_getoverrun = 225
 - SYS_timer_delete = 226
 - SYS_clock_settime = 227
 - SYS_clock_gettime = 228
 - SYS_clock_getres = 229
 - SYS_clock_nanosleep = 230
 - SYS_exit_group = 231
 - SYS_epoll_wait = 232
 - SYS_epoll_ctl = 233
 - SYS_tgkill = 234
 - SYS_utimes = 235
 - SYS_vserver = 236
 - SYS_mbind = 237
 - SYS_set_mempolicy = 238
 - SYS_get_mempolicy = 239
 - SYS_mq_open = 240
 - SYS_mq_unlink = 241
 - SYS_mq_timedsend = 242
 - SYS_mq_timedreceive = 243
 - SYS_mq_notify = 244
 - SYS_mq_getsetattr = 245
 - SYS_kexec_load = 246
 - SYS_waitid = 247
 - SYS_add_key = 248
 - SYS_request_key = 249
 - SYS_keyctl = 250
 - SYS_ioprio_set = 251
 - SYS_ioprio_get = 252
 - SYS_inotify_init = 253
 - SYS_inotify_add_watch = 254
 - SYS_inotify_rm_watch = 255
 - SYS_migrate_pages = 256
 - SYS_openat = 257
 - SYS_mkdirat = 258
 - SYS_mknodat = 259
 - SYS_fchownat = 260
 - SYS_futimesat = 261
 - SYS_newfstatat = 262
 - SYS_unlinkat = 263
 - SYS_renameat = 264
 - SYS_linkat = 265
 - SYS_symlinkat = 266
 - SYS_readlinkat = 267
 - SYS_fchmodat = 268
 - SYS_faccessat = 269
 - SYS_pselect6 = 270
 - SYS_ppoll = 271
 - SYS_unshare = 272
 - SYS_set_robust_list = 273
 - SYS_get_robust_list = 274
 - SYS_splice = 275
 - SYS_tee = 276
 - SYS_sync_file_range = 277
 - SYS_vmsplice = 278
 - SYS_move_pages = 279
 - SYS_utimensat = 280
 - SYS_epoll_pwait = 281
 - SYS_signalfd = 282
 - SYS_timerfd_create = 283
 - SYS_eventfd = 284
 - SYS_fallocate = 285
 - SYS_timerfd_settime = 286
 - SYS_timerfd_gettime = 287
 - SYS_accept4 = 288
 - SYS_signalfd4 = 289
 - SYS_eventfd2 = 290
 - SYS_epoll_create1 = 291
 - SYS_dup3 = 292
 - SYS_pipe2 = 293
 - SYS_inotify_init1 = 294
 - SYS_preadv = 295
 - SYS_pwritev = 296
 - SYS_rt_tgsigqueueinfo = 297
 - SYS_perf_event_open = 298
 - SYS_recvmmsg = 299
 - SYS_fanotify_init = 300
 - SYS_fanotify_mark = 301
 - SYS_prlimit64 = 302
 - SYS_name_to_handle_at = 303
 - SYS_open_by_handle_at = 304
 - SYS_clock_adjtime = 305
 - SYS_syncfs = 306
 - SYS_sendmmsg = 307
 - SYS_setns = 308
 - SYS_getcpu = 309
 - SYS_process_vm_readv = 310
 - SYS_process_vm_writev = 311
 - SYS_kcmp = 312
 - SYS_finit_module = 313
 - SYS_sched_setattr = 314
 - SYS_sched_getattr = 315
 - SYS_renameat2 = 316
 - SYS_seccomp = 317
 - SYS_getrandom = 318
 - SYS_memfd_create = 319
 - SYS_kexec_file_load = 320
 - SYS_bpf = 321
 - SYS_execveat = 322
 - SYS_userfaultfd = 323
 - SYS_membarrier = 324
 - SYS_mlock2 = 325
 - SYS_copy_file_range = 326
 - SYS_preadv2 = 327
 - SYS_pwritev2 = 328
 - SYS_pkey_mprotect = 329
 - SYS_pkey_alloc = 330
 - SYS_pkey_free = 331
 - SYS_statx = 332
 - SYS_io_pgetevents = 333
 - SYS_rseq = 334
 
总结
seccomp 是Linux系统中强大的安全机制,提供了:
通过合理使用seccomp,可以显著提高应用程序的安全性,构建更加安全可靠的计算环境。在实际应用中,需要仔细设计过滤策略,充分测试,并考虑错误处理和调试需求。
