seccomp系统调用及示例

  1. 函数介绍

seccomp 是Linux系统调用过滤机制,用于限制进程可以执行的系统调用。它通过Berkeley Packet Filter (BPF) 程序来定义哪些系统调用是允许的,哪些是禁止的。seccomp 是构建沙箱环境、提高应用程序安全性的重要工具,可以有效防止恶意代码执行危险的系统调用。

  1. 函数原型
1
2
3
4
5
6
7
8
9
10
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <unistd.h>

int prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);

int seccomp(unsigned int operation, unsigned int flags, void *args);

  1. 功能

seccomp 提供了系统调用级别的安全控制,可以:

  • 限制进程可执行的系统调用集合

  • 定义系统调用的执行策略(允许、错误、终止)

  • 使用BPF程序实现复杂的过滤逻辑

  • 构建安全的沙箱环境

  1. 参数

prctl方式:

  • int option: 控制选项(如PR_SET_SECCOMP)

  • unsigned long arg2: seccomp模式(SECCOMP_MODE_STRICT/SECCOMP_MODE_FILTER)

  • 其他参数: 根据选项而定

seccomp系统调用:

  • unsigned int operation: 操作类型(SECCOMP_SET_MODE_STRICT/SECCOMP_SET_MODE_FILTER)

  • unsigned int flags: 标志位(通常为0)

  • *void args: 操作参数(BPF程序指针等)

  1. 返回值
  • 成功: 返回0

  • 失败: 返回-1,并设置errno

  1. 相似函数,或关联函数
  • prctl: 进程控制接口

  • personality: 设置进程执行特性

  • chroot: 改变根目录

  • capset: 设置进程权限

  1. 示例代码

示例1:基础seccomp使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>

/**
* 演示基础seccomp使用方法
*/
int demo_seccomp_basic() {
printf("=== 基础seccomp使用示例 ===\n");

// 显示当前seccomp状态
int current_mode = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
printf("当前seccomp模式: ");
switch (current_mode) {
case 0:
printf("SECCOMP_MODE_DISABLED (禁用)\n");
break;
case 1:
printf("SECCOMP_MODE_STRICT (严格模式)\n");
break;
case 2:
printf("SECCOMP_MODE_FILTER (过滤模式)\n");
break;
default:
printf("未知模式 (%d)\n", current_mode);
break;
}

// 测试普通系统调用(应该成功)
printf("测试普通系统调用...\n");
write(STDOUT_FILENO, " 普通write调用成功\n", 21);

// 启用严格模式seccomp
printf("启用seccomp严格模式...\n");
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0) == -1) {
printf("启用seccomp失败: %s\n", strerror(errno));
printf("注意:严格模式只允许read/write/exit/exit_group系统调用\n");
return -1;
}

printf("seccomp严格模式启用成功\n");
printf("当前seccomp模式: %d\n", prctl(PR_GET_SECCOMP, 0, 0, 0, 0));

// 测试允许的系统调用
printf("测试允许的系统调用...\n");
write(STDOUT_FILENO, " write调用仍然允许\n", 20);

// 测试不允许的系统调用(这会导致程序终止)
printf("测试不允许的系统调用(程序将终止)...\n");
printf(" 尝试调用getpid()...\n");

// 注意:下面的调用会导致程序被SIGKILL终止
// 为了演示目的,我们注释掉危险操作
/*
pid_t pid = getpid(); // 这会导致程序终止!
printf("getpid()返回: %d\n", pid);
*/

printf(" 注意:getpid()等系统调用在严格模式下会被禁止\n");
printf(" 实际执行会导致程序被SIGKILL终止\n");

return 0;
}

int main() {
return demo_seccomp_basic();
}

示例2:自定义BPF过滤器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>

/**
* 创建允许特定系统调用的BPF过滤器
*/
int demo_custom_bpf_filter() {
printf("=== 自定义BPF过滤器示例 ===\n");

// 定义BPF过滤器程序
// 允许的系统调用:read, write, exit, exit_group
struct sock_filter filter&#91;] = {
// 加载系统调用号到累加器
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),

// 允许 read 系统调用 (SYS_read = 0)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 允许 write 系统调用 (SYS_write = 1)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 允许 exit 系统调用 (SYS_exit = 60 on x86_64)
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1),
#elif defined(__i386__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1),
#endif
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 允许 exit_group 系统调用
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1),
#elif defined(__i386__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 252, 0, 1),
#endif
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 其他系统调用返回EPERM错误
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
};

struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter&#91;0]),
.filter = filter,
};

// 显示过滤器信息
printf("创建BPF过滤器,允许系统调用:\n");
printf(" read(%d), write(%d), exit(%d), exit_group(%d)\n",
#ifdef __x86_64__
SYS_read, SYS_write, 60, 231
#elif defined(__i386__)
SYS_read, SYS_write, 1, 252
#endif
);
printf("其他系统调用将返回EPERM错误\n");

// 应用BPF过滤器
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
printf("应用BPF过滤器失败: %s\n", strerror(errno));
printf("可能的原因:\n");
printf(" 1. 内核不支持seccomp BPF\n");
printf(" 2. 缺少CAP_SYS_ADMIN权限\n");
printf(" 3. 已经设置了seccomp策略\n");
return -1;
}

printf("BPF过滤器应用成功\n");

// 测试允许的系统调用
printf("\n测试允许的系统调用:\n");
write(STDOUT_FILENO, " write调用成功\n", 16);

char buffer&#91;10];
ssize_t bytes_read = read(STDIN_FILENO, buffer, sizeof(buffer));
if (bytes_read >= 0) {
printf(" read调用成功\n");
}

// 测试不允许的系统调用
printf("\n测试不允许的系统调用:\n");
long result = syscall(SYS_getpid);
if (result == -1) {
printf(" getpid调用被阻止: %s\n", strerror(errno));
} else {
printf(" getpid调用意外成功: %ld\n", result);
}

result = syscall(SYS_open, "/etc/passwd", 0);
if (result == -1) {
printf(" open调用被阻止: %s\n", strerror(errno));
} else {
printf(" open调用意外成功: %ld\n", result);
}

printf("\n安全的系统调用仍然可以正常工作\n");

return 0;
}

int main() {
return demo_custom_bpf_filter();
}

示例3:只读沙箱环境

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/stat.h>

/**
* 创建只读沙箱环境的BPF过滤器
*/
int demo_readonly_sandbox() {
printf("=== 只读沙箱环境示例 ===\n");

// 定义只读沙箱的BPF过滤器
// 允许读操作和基本系统调用,禁止写操作
struct sock_filter filter&#91;] = {
// 加载系统调用号
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),

// 允许 read 系统调用
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 允许 write 系统调用(仅允许写到stdout/stderr)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 5),
// 检查文件描述符是否为stdout(1)或stderr(2)
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args&#91;0])),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1), // stdout
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 1), // stderr
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),

// 允许 exit 和 exit_group
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1), // exit
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1), // exit_group
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#elif defined(__i386__)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 1), // exit
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 252, 0, 1), // exit_group
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#endif

// 允许 read-only 文件操作
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 3),
// 检查打开标志是否包含O_RDONLY
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args&#91;1])),
BPF_JUMP(BPF_JMP | BPF_JSET | BPF_K, O_RDONLY, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),

// 允许 close 系统调用
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_close, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 禁止其他所有系统调用
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (EPERM & 0xFFFF)),
};

struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter&#91;0]),
.filter = filter,
};

printf("创建只读沙箱环境\n");
printf("允许的操作:\n");
printf(" - 读取文件(只读模式)\n");
printf(" - 写入标准输出和标准错误\n");
printf(" - 基本的进程控制\n");
printf("禁止的操作:\n");
printf(" - 写入文件\n");
printf(" - 网络操作\n");
printf(" - 进程创建\n");
printf(" - 其他危险操作\n");

// 应用过滤器
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
printf("创建沙箱失败: %s\n", strerror(errno));
return -1;
}

printf("只读沙箱创建成功\n");

// 测试沙箱功能
printf("\n=== 沙箱功能测试 ===\n");

// 测试允许的读操作
printf("1. 测试允许的读操作:\n");
int fd = open("/etc/passwd", O_RDONLY);
if (fd != -1) {
char buffer&#91;100];
ssize_t bytes = read(fd, buffer, sizeof(buffer));
if (bytes > 0) {
printf(" 读取/etc/passwd成功 (%zd 字节)\n", bytes);
}
close(fd);
} else {
printf(" 打开/etc/passwd失败: %s\n", strerror(errno));
}

// 测试允许的写操作(stdout/stderr)
printf("\n2. 测试允许的写操作:\n");
write(STDOUT_FILENO, " 写入stdout成功\n", 17);
write(STDERR_FILENO, " 写入stderr成功\n", 17);

// 测试禁止的写操作
printf("\n3. 测试禁止的写操作:\n");
fd = open("/tmp/test_seccomp", O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (fd == -1) {
printf(" 创建文件被阻止: %s\n", strerror(errno));
} else {
printf(" 创建文件意外成功\n");
close(fd);
unlink("/tmp/test_seccomp");
}

// 测试禁止的系统调用
printf("\n4. 测试禁止的系统调用:\n");
long result = syscall(SYS_fork);
if (result == -1) {
printf(" fork被阻止: %s\n", strerror(errno));
}

result = syscall(SYS_socket, AF_INET, SOCK_STREAM, 0);
if (result == -1) {
printf(" socket被阻止: %s\n", strerror(errno));
}

printf("\n沙箱环境测试完成\n");

return 0;
}

int main() {
return demo_readonly_sandbox();
}

示例4:进程监控和日志

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <signal.h>
#include <sys/wait.h>

/**
* 信号处理函数
*/
void signal_handler(int sig) {
printf("捕获信号 %d\n", sig);
if (sig == SIGSYS) {
printf("检测到被禁止的系统调用\n");
}
}

/**
* 演示seccomp的监控和日志功能
*/
int demo_seccomp_monitoring() {
printf("=== seccomp监控和日志示例 ===\n");

// 注册信号处理程序来捕获SIGSYS
signal(SIGSYS, signal_handler);

// 创建带日志的BPF过滤器
struct sock_filter filter&#91;] = {
// 加载系统调用号
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)),

// 允许基本的读写操作
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),

// 允许exit相关调用
#ifdef __x86_64__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 60, 0, 1), // exit
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 231, 0, 1), // exit_group
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
#endif

// 对于其他系统调用,返回追踪标志(用于日志)
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRACE | (1 & 0xFFFF)),
};

struct sock_fprog prog = {
.len = sizeof(filter) / sizeof(filter&#91;0]),
.filter = filter,
};

printf("创建带监控的日志过滤器\n");
printf("SECCOMP_RET_TRACE可以用于:\n");
printf(" - 系统调用追踪\n");
printf(" - 安全审计\n");
printf(" - 调试和分析\n");

// 启用seccomp
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) == -1) {
printf("启用seccomp失败: %s\n", strerror(errno));
return -1;
}

printf("seccomp监控启用成功\n");

// 测试监控功能
printf("\n测试监控功能:\n");

// 允许的系统调用
write(STDOUT_FILENO, "允许的write调用\n", 17);

// 被监控的系统调用
printf("测试被监控的系统调用:\n");

pid_t pid = getpid();
printf("getpid()返回: %d\n", (int)pid);

uid_t uid = getuid();
printf("getuid()返回: %d\n", (int)uid);

printf("注意:在实际应用中,SECCOMP_RET_TRACE会触发ptrace事件\n");
printf("这需要额外的监控进程来处理追踪事件\n");

return 0;
}

int main() {
return demo_seccomp_monitoring();
}

示例5:安全沙箱应用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#define _GNU_SOURCE
#include <linux/seccomp.h>
#include <linux/filter.h>
#include <linux/audit.h>
#include <sys/prctl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/syscall.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>

/**
* 安全沙箱配置
*/
typedef struct {
int allow_network;
int allow_file_write;
int allow_process_creation;
int allow_memory_mapping;
} sandbox_config_t;

/**
* 创建安全沙箱
*/
int create_secure_sandbox(const sandbox_config_t *config) {
printf("=== 创建安全沙箱 ===\n");

// 根据配置创建BPF过滤器
struct sock_filter filter&#91;100];
int filter_index = 0;

// 基础加载系统调用号指令
filter&#91;filter_index++] = BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
offsetof(struct seccomp_data, nr));

// 始终允许的系统调用
int essential_calls&#91;] = {SYS_read, SYS_write,
#ifdef __x86_64__
60, // exit
231 // exit_group
#elif defined(__i386__)
1, // exit
252 // exit_group
#endif
};

for (size_t i = 0; i < sizeof(essential_calls)/sizeof(essential_calls&#91;0]); i++) {
filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
essential_calls&#91;i], 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}

// 根据配置允许额外的系统调用
if (config->allow_file_write) {
filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_open, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);

filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_openat, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);

filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_close, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}

if (config->allow_network) {
filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_socket, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);

filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_connect, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}

if (config->allow_process_creation) {
filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_fork, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);

filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_clone, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}

if (config->allow_memory_mapping) {
filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_mmap, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);

filter&#91;filter_index++] = BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_munmap, 0, 1);
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW);
}

// 默认拒绝所有其他系统调用
filter&#91;filter_index++] = BPF_STMT(BPF_RET | BPF_K,
SECCOMP_RET_ERRNO | (EPERM & 0xFFFF));

struct sock_fprog prog = {
.len = filter_index,
.filter = filter,
};

printf("沙箱配置:\n");
printf(" 网络访问: %s\n", config->allow_network ? "允许" : "禁止");
printf(" 文件写入: %s\n", config->allow_file_write ? "允许" : "禁止");
printf(" 进程创建: %s\n", config->allow_process_creation ? "允许" : "禁止");
printf(" 内存映射: %s\n", config->allow_memory_mapping ? "允许" : "禁止");

// 应用沙箱
if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) == -1) {
printf("创建沙箱失败: %s\n", strerror(errno));
return -1;
}

printf("安全沙箱创建成功\n");
return 0;
}

/**
* 演示不同安全级别的沙箱
*/
int demo_security_levels() {
sandbox_config_t configs&#91;3] = {
// 最严格:只允许基本I/O
{0, 0, 0, 0},

// 中等:允许文件操作
{0, 1, 0, 1},

// 宽松:允许网络和进程创建
{1, 1, 1, 1}
};

const char *level_names&#91;] = {"最高安全", "中等安全", "较低安全"};

printf("=== 不同安全级别沙箱演示 ===\n");

for (int level = 0; level < 3; level++) {
printf("\n--- %s级别沙箱 ---\n", level_names&#91;level]);

if (create_secure_sandbox(&configs&#91;level]) == 0) {
printf("沙箱 %s 创建成功\n", level_names&#91;level]);

// 测试沙箱功能
write(STDOUT_FILENO, "基本I/O测试成功\n", 17);

if (configs&#91;level].allow_network) {
printf("网络功能可用\n");
}

if (configs&#91;level].allow_file_write) {
printf("文件写入功能可用\n");
}

// 由于seccomp策略一旦设置就不能放松,我们需要在子进程中测试
break; // 只测试第一个配置
}
}

return 0;
}

/**
* 演示沙箱的实际应用
*/
int demo_practical_sandbox() {
printf("=== 实际沙箱应用演示 ===\n");

// 创建一个限制性的沙箱:只允许基本操作
sandbox_config_t config = {0, 0, 0, 0}; // 最严格

if (create_secure_sandbox(&config) != 0) {
return -1;
}

printf("\n沙箱环境中运行测试程序:\n");

// 测试基本功能
printf("1. 基本输出测试:\n");
printf(" 标准输出工作正常\n");
write(STDOUT_FILENO, " write系统调用工作正常\n", 24);

// 测试被限制的功能
printf("\n2. 被限制功能测试:\n");

// 尝试网络操作
long result = syscall(SYS_socket, AF_INET, SOCK_STREAM, 0);
if (result == -1) {
printf(" 网络操作被成功阻止: %s\n", strerror(errno));
}

// 尝试文件写入
result = syscall(SYS_open, "/tmp/test", O_WRONLY | O_CREAT, 0644);
if (result == -1) {
printf(" 文件写入被成功阻止: %s\n", strerror(errno));
}

// 尝试进程创建
result = syscall(SYS_fork);
if (result == -1) {
printf(" 进程创建被成功阻止: %s\n", strerror(errno));
}

printf("\n3. 沙箱优势:\n");
printf(" ✓ 防止恶意代码执行危险操作\n");
printf(" ✓ 限制程序的权限范围\n");
printf(" ✓ 提供额外的安全层\n");
printf(" ✓ 可以与其它安全机制配合使用\n");

printf("\n4. 使用场景:\n");
printf(" - 插件或扩展的安全执行\n");
printf(" - 不可信代码的沙箱运行\n");
printf(" - 容器和虚拟化环境\n");
printf(" - 安全审计和监控\n");

return 0;
}

int main() {
printf("seccomp - Linux系统调用过滤机制\n");
printf("================================\n\n");

// 由于seccomp策略一旦设置就会影响整个进程,
// 我们分别在不同的子进程中演示不同功能

if (fork() == 0) {
return demo_practical_sandbox();
}

int status;
wait(&status);

return 0;
}

seccomp 使用注意事项

系统要求:

内核版本: 需要Linux 3.5或更高版本

data-ad-format="fluid" data-ad-layout-key="-7k+ex-4a-9w+4a">

架构支持: 支持多种CPU架构

编译选项: 需要内核编译时启用CONFIG_SECCOMP

权限要求:

  1. CAP_SYS_ADMIN: 通常需要管理员权限2. 无特权进程: 可以使用SECCOMP_MODE_STRICT3. 容器环境: Docker等容器可能有限制

安全考虑:

  1. 策略不可逆: 一旦应用,seccomp策略不能放松2. 调试困难: 被阻止的系统调用可能难以调试3. 兼容性: 可能影响程序的正常功能4. 性能影响: BPF过滤会增加系统调用开销

最佳实践:

渐进式应用: 从宽松策略开始,逐步收紧

充分测试: 在生产环境前充分测试

错误处理: 妥善处理被阻止的系统调用

日志记录: 记录安全相关事件

备份方案: 提供策略失效时的处理方案

seccomp 模式详解

SECCOMP_MODE_STRICT (模式1):

  • 特点: 最简单的模式,只允许read/write/exit/exit_group

  • 优点: 简单、高效、安全

  • 缺点: 功能极其有限

  • 适用: 极度安全要求的简单程序

SECCOMP_MODE_FILTER (模式2):

  • 特点: 使用BPF程序定义复杂过滤规则

  • 优点: 灵活、功能强大

  • 缺点: 配置复杂

  • 适用: 大多数实际应用场景

常见系统调用编号

x86_64架构:

  • SYS_read = 0

  • SYS_write = 1

  • SYS_open = 2

  • SYS_close = 3

  • SYS_stat = 4

  • SYS_fstat = 5

  • SYS_lstat = 6

  • SYS_poll = 7

  • SYS_lseek = 8

  • SYS_mmap = 9

  • SYS_mprotect = 10

  • SYS_munmap = 11

  • SYS_brk = 12

  • SYS_rt_sigaction = 13

  • SYS_rt_sigprocmask = 14

  • SYS_rt_sigreturn = 15

  • SYS_ioctl = 16

  • SYS_pread64 = 17

  • SYS_pwrite64 = 18

  • SYS_readv = 19

  • SYS_writev = 20

  • SYS_access = 21

  • SYS_pipe = 22

  • SYS_select = 23

  • SYS_sched_yield = 24

  • SYS_mremap = 25

  • SYS_msync = 26

  • SYS_mincore = 27

  • SYS_madvise = 28

  • SYS_shmget = 29

  • SYS_shmat = 30

  • SYS_shmctl = 31

  • SYS_dup = 32

  • SYS_dup2 = 33

  • SYS_pause = 34

  • SYS_nanosleep = 35

  • SYS_getitimer = 36

  • SYS_alarm = 37

  • SYS_setitimer = 38

  • SYS_getpid = 39

  • SYS_sendfile = 40

  • SYS_socket = 41

  • SYS_connect = 42

  • SYS_accept = 43

  • SYS_sendto = 44

  • SYS_recvfrom = 45

  • SYS_sendmsg = 46

  • SYS_recvmsg = 47

  • SYS_shutdown = 48

  • SYS_bind = 49

  • SYS_listen = 50

  • SYS_getsockname = 51

  • SYS_getpeername = 52

  • SYS_socketpair = 53

  • SYS_setsockopt = 54

  • SYS_getsockopt = 55

  • SYS_clone = 56

  • SYS_fork = 57

  • SYS_vfork = 58

  • SYS_execve = 59

  • SYS_exit = 60

  • SYS_wait4 = 61

  • SYS_kill = 62

  • SYS_uname = 63

  • SYS_semget = 64

  • SYS_semop = 65

  • SYS_semctl = 66

  • SYS_shmdt = 67

  • SYS_msgget = 68

  • SYS_msgsnd = 69

  • SYS_msgrcv = 70

  • SYS_msgctl = 71

  • SYS_fcntl = 72

  • SYS_flock = 73

  • SYS_fsync = 74

  • SYS_fdatasync = 75

  • SYS_truncate = 76

  • SYS_ftruncate = 77

  • SYS_getdents = 78

  • SYS_getcwd = 79

  • SYS_chdir = 80

  • SYS_fchdir = 81

  • SYS_rename = 82

  • SYS_mkdir = 83

  • SYS_rmdir = 84

  • SYS_creat = 85

  • SYS_link = 86

  • SYS_unlink = 87

  • SYS_symlink = 88

  • SYS_readlink = 89

  • SYS_chmod = 90

  • SYS_fchmod = 91

  • SYS_chown = 92

  • SYS_fchown = 93

  • SYS_lchown = 94

  • SYS_umask = 95

  • SYS_gettimeofday = 96

  • SYS_getrlimit = 97

  • SYS_getrusage = 98

  • SYS_sysinfo = 99

  • SYS_times = 100

  • SYS_ptrace = 101

  • SYS_getuid = 102

  • SYS_syslog = 103

  • SYS_getgid = 104

  • SYS_setuid = 105

  • SYS_setgid = 106

  • SYS_geteuid = 107

  • SYS_getegid = 108

  • SYS_setpgid = 109

  • SYS_getppid = 110

  • SYS_getpgrp = 111

  • SYS_setsid = 112

  • SYS_setreuid = 113

  • SYS_setregid = 114

  • SYS_getgroups = 115

  • SYS_setgroups = 116

  • SYS_setresuid = 117

  • SYS_getresuid = 118

  • SYS_setresgid = 119

  • SYS_getresgid = 120

  • SYS_getpgid = 121

  • SYS_setfsuid = 122

  • SYS_setfsgid = 123

  • SYS_getsid = 124

  • SYS_capget = 125

  • SYS_capset = 126

  • SYS_rt_sigpending = 127

  • SYS_rt_sigtimedwait = 128

  • SYS_rt_sigqueueinfo = 129

  • SYS_rt_sigsuspend = 130

  • SYS_sigaltstack = 131

  • SYS_utime = 132

  • SYS_mknod = 133

  • SYS_uselib = 134

  • SYS_personality = 135

  • SYS_ustat = 136

  • SYS_statfs = 137

  • SYS_fstatfs = 138

  • SYS_sysfs = 139

  • SYS_getpriority = 140

  • SYS_setpriority = 141

  • SYS_sched_setparam = 142

  • SYS_sched_getparam = 143

  • SYS_sched_setscheduler = 144

  • SYS_sched_getscheduler = 145

  • SYS_sched_get_priority_max = 146

  • SYS_sched_get_priority_min = 147

  • SYS_sched_rr_get_interval = 148

  • SYS_mlock = 149

  • SYS_munlock = 150

  • SYS_mlockall = 151

  • SYS_munlockall = 152

  • SYS_vhangup = 153

  • SYS_modify_ldt = 154

  • SYS_pivot_root = 155

  • SYS__sysctl = 156

  • SYS_prctl = 157

  • SYS_arch_prctl = 158

  • SYS_adjtimex = 159

  • SYS_setrlimit = 160

  • SYS_chroot = 161

  • SYS_sync = 162

  • SYS_acct = 163

  • SYS_settimeofday = 164

  • SYS_mount = 165

  • SYS_umount2 = 166

  • SYS_swapon = 167

  • SYS_swapoff = 168

  • SYS_reboot = 169

  • SYS_sethostname = 170

  • SYS_setdomainname = 171

  • SYS_iopl = 172

  • SYS_ioperm = 173

  • SYS_create_module = 174

  • SYS_init_module = 175

  • SYS_delete_module = 176

  • SYS_get_kernel_syms = 177

  • SYS_query_module = 178

  • SYS_quotactl = 179

  • SYS_nfsservctl = 180

  • SYS_getpmsg = 181

  • SYS_putpmsg = 182

  • SYS_afs_syscall = 183

  • SYS_tuxcall = 184

  • SYS_security = 185

  • SYS_gettid = 186

  • SYS_readahead = 187

  • SYS_setxattr = 188

  • SYS_lsetxattr = 189

  • SYS_fsetxattr = 190

  • SYS_getxattr = 191

  • SYS_lgetxattr = 192

  • SYS_fgetxattr = 193

  • SYS_listxattr = 194

  • SYS_llistxattr = 195

  • SYS_flistxattr = 196

  • SYS_removexattr = 197

  • SYS_lremovexattr = 198

  • SYS_fremovexattr = 199

  • SYS_tkill = 200

  • SYS_time = 201

  • SYS_futex = 202

  • SYS_sched_setaffinity = 203

  • SYS_sched_getaffinity = 204

  • SYS_set_thread_area = 205

  • SYS_io_setup = 206

  • SYS_io_destroy = 207

  • SYS_io_getevents = 208

  • SYS_io_submit = 209

  • SYS_io_cancel = 210

  • SYS_get_thread_area = 211

  • SYS_lookup_dcookie = 212

  • SYS_epoll_create = 213

  • SYS_epoll_ctl_old = 214

  • SYS_epoll_wait_old = 215

  • SYS_remap_file_pages = 216

  • SYS_getdents64 = 217

  • SYS_set_tid_address = 218

  • SYS_restart_syscall = 219

  • SYS_semtimedop = 220

  • SYS_fadvise64 = 221

  • SYS_timer_create = 222

  • SYS_timer_settime = 223

  • SYS_timer_gettime = 224

  • SYS_timer_getoverrun = 225

  • SYS_timer_delete = 226

  • SYS_clock_settime = 227

  • SYS_clock_gettime = 228

  • SYS_clock_getres = 229

  • SYS_clock_nanosleep = 230

  • SYS_exit_group = 231

  • SYS_epoll_wait = 232

  • SYS_epoll_ctl = 233

  • SYS_tgkill = 234

  • SYS_utimes = 235

  • SYS_vserver = 236

  • SYS_mbind = 237

  • SYS_set_mempolicy = 238

  • SYS_get_mempolicy = 239

  • SYS_mq_open = 240

  • SYS_mq_unlink = 241

  • SYS_mq_timedsend = 242

  • SYS_mq_timedreceive = 243

  • SYS_mq_notify = 244

  • SYS_mq_getsetattr = 245

  • SYS_kexec_load = 246

  • SYS_waitid = 247

  • SYS_add_key = 248

  • SYS_request_key = 249

  • SYS_keyctl = 250

  • SYS_ioprio_set = 251

  • SYS_ioprio_get = 252

  • SYS_inotify_init = 253

  • SYS_inotify_add_watch = 254

  • SYS_inotify_rm_watch = 255

  • SYS_migrate_pages = 256

  • SYS_openat = 257

  • SYS_mkdirat = 258

  • SYS_mknodat = 259

  • SYS_fchownat = 260

  • SYS_futimesat = 261

  • SYS_newfstatat = 262

  • SYS_unlinkat = 263

  • SYS_renameat = 264

  • SYS_linkat = 265

  • SYS_symlinkat = 266

  • SYS_readlinkat = 267

  • SYS_fchmodat = 268

  • SYS_faccessat = 269

  • SYS_pselect6 = 270

  • SYS_ppoll = 271

  • SYS_unshare = 272

  • SYS_set_robust_list = 273

  • SYS_get_robust_list = 274

  • SYS_splice = 275

  • SYS_tee = 276

  • SYS_sync_file_range = 277

  • SYS_vmsplice = 278

  • SYS_move_pages = 279

  • SYS_utimensat = 280

  • SYS_epoll_pwait = 281

  • SYS_signalfd = 282

  • SYS_timerfd_create = 283

  • SYS_eventfd = 284

  • SYS_fallocate = 285

  • SYS_timerfd_settime = 286

  • SYS_timerfd_gettime = 287

  • SYS_accept4 = 288

  • SYS_signalfd4 = 289

  • SYS_eventfd2 = 290

  • SYS_epoll_create1 = 291

  • SYS_dup3 = 292

  • SYS_pipe2 = 293

  • SYS_inotify_init1 = 294

  • SYS_preadv = 295

  • SYS_pwritev = 296

  • SYS_rt_tgsigqueueinfo = 297

  • SYS_perf_event_open = 298

  • SYS_recvmmsg = 299

  • SYS_fanotify_init = 300

  • SYS_fanotify_mark = 301

  • SYS_prlimit64 = 302

  • SYS_name_to_handle_at = 303

  • SYS_open_by_handle_at = 304

  • SYS_clock_adjtime = 305

  • SYS_syncfs = 306

  • SYS_sendmmsg = 307

  • SYS_setns = 308

  • SYS_getcpu = 309

  • SYS_process_vm_readv = 310

  • SYS_process_vm_writev = 311

  • SYS_kcmp = 312

  • SYS_finit_module = 313

  • SYS_sched_setattr = 314

  • SYS_sched_getattr = 315

  • SYS_renameat2 = 316

  • SYS_seccomp = 317

  • SYS_getrandom = 318

  • SYS_memfd_create = 319

  • SYS_kexec_file_load = 320

  • SYS_bpf = 321

  • SYS_execveat = 322

  • SYS_userfaultfd = 323

  • SYS_membarrier = 324

  • SYS_mlock2 = 325

  • SYS_copy_file_range = 326

  • SYS_preadv2 = 327

  • SYS_pwritev2 = 328

  • SYS_pkey_mprotect = 329

  • SYS_pkey_alloc = 330

  • SYS_pkey_free = 331

  • SYS_statx = 332

  • SYS_io_pgetevents = 333

  • SYS_rseq = 334

总结

seccomp 是Linux系统中强大的安全机制,提供了:

  1. 系统调用级别的访问控制: 精确控制进程可以执行的操作2. 灵活的策略定义: 通过BPF程序实现复杂过滤逻辑3. 高效的执行: 内核级别的过滤,性能开销小4. 广泛的应用场景: 适用于沙箱、容器、安全审计等

通过合理使用seccomp,可以显著提高应用程序的安全性,构建更加安全可靠的计算环境。在实际应用中,需要仔细设计过滤策略,充分测试,并考虑错误处理和调试需求。

data-ad-format="auto" data-full-width-responsive="true">