2025-08-16

openat系统调用及示例

openat - 相对于目录文件描述符打开文件

1. 函数介绍

openat 是一个 Linux 系统调用，用于相对于指定目录文件描述符打开文件。它是 open 函数的扩展版本，提供了更灵活的文件打开方式，支持相对路径操作，避免了某些竞态条件。

2. 函数原型

#include <fcntl.h>

int openat(int dirfd, const char *pathname, int flags);
int openat(int dirfd, const char *pathname, int flags, mode_t mode);

3. 功能

相对于目录文件描述符 dirfd 打开或创建文件。如果 pathname 是相对路径，则相对于 dirfd 指定的目录解析；如果是绝对路径，则忽略 dirfd。

4. 参数

int dirfd: 目录文件描述符

AT_FDCWD: 使用当前工作目录
有效的目录文件描述符：相对于该目录进行操作
负数：特殊值（如 AT_FDCWD）

const char *pathname: 文件路径名

int flags: 文件打开标志

O_RDONLY: 只读打开
O_WRONLY: 只写打开
O_RDWR: 读写打开
O_CREAT: 文件不存在时创建
O_EXCL: 与 O_CREAT 配合使用，确保原子创建
O_TRUNC: 截断已存在的文件
O_APPEND: 追加模式
O_NONBLOCK: 非阻塞模式
O_SYNC: 同步写入
等等…

mode_t mode: 文件权限模式（当使用 O_CREAT 时必需）

例如：0644, 0755 等

5. 返回值

成功时返回新的文件描述符（非负整数）
失败时返回 -1，并设置 errno

6. 常见 errno 错误码

EACCES: 权限不足
EEXIST: 文件已存在（使用 O_CREAT|O_EXCL 时）
EISDIR: 路径指向目录而非文件
ENOENT: 文件或路径不存在
ENOTDIR: dirfd 不是目录文件描述符
EACCES: 访问权限不足
ELOOP: 符号链接层级过深
ENAMETOOLONG: 路径名过长
ENOMEM: 内存不足
ENOSPC: 磁盘空间不足
EROFS: 文件系统为只读

7. 相似函数，或关联函数

open(): 传统的文件打开函数
creat(): 创建文件（已废弃，使用 open 代替）
close(): 关闭文件描述符
read(), write(): 文件读写操作
fstatat(): 相对于目录文件描述符获取文件状态
mkdirat(): 相对于目录文件描述符创建目录
unlinkat(): 相对于目录文件描述符删除文件
readlinkat(): 相对于目录文件描述符读取符号链接

8. 示例代码

示例1：基本使用 - 相对路径文件操作

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>

int main() {
    printf("=== openat 基本使用演示 ===\n");
    
    // 创建测试目录结构
    if (mkdir("test_dir", 0755) == -1 && errno != EEXIST) {
        perror("创建测试目录失败");
        exit(EXIT_FAILURE);
    }
    
    printf("创建测试目录: test_dir\n");
    
    // 方法1: 使用 AT_FDCWD（当前工作目录）打开文件
    int fd1 = openat(AT_FDCWD, "test_dir/test_file1.txt", 
                     O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd1 == -1) {
        perror("使用 AT_FDCWD 创建文件失败");
        rmdir("test_dir");
        exit(EXIT_FAILURE);
    }
    
    printf("✓ 使用 AT_FDCWD 成功创建文件: test_dir/test_file1.txt (fd: %d)\n", fd1);
    
    // 写入数据
    const char *content1 = "This is test file 1 content.\n";
    write(fd1, content1, strlen(content1));
    close(fd1);
    
    // 方法2: 打开目录获取文件描述符，然后相对打开文件
    int dirfd = open("test_dir", O_RDONLY);
    if (dirfd == -1) {
        perror("打开目录失败");
        unlink("test_dir/test_file1.txt");
        rmdir("test_dir");
        exit(EXIT_FAILURE);
    }
    
    printf("✓ 成功打开目录: test_dir (dirfd: %d)\n", dirfd);
    
    // 相对于目录文件描述符创建文件
    int fd2 = openat(dirfd, "test_file2.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd2 == -1) {
        perror("相对目录创建文件失败");
        close(dirfd);
        unlink("test_dir/test_file1.txt");
        rmdir("test_dir");
        exit(EXIT_FAILURE);
    }
    
    printf("✓ 相对目录成功创建文件: test_file2.txt (fd: %d)\n", fd2);
    
    // 写入数据
    const char *content2 = "This is test file 2 content.\n";
    write(fd2, content2, strlen(content2));
    close(fd2);
    
    // 验证文件创建结果
    printf("\n验证创建的文件:\n");
    
    // 读取第一个文件
    int fd_read1 = openat(AT_FDCWD, "test_dir/test_file1.txt", O_RDONLY);
    if (fd_read1 != -1) {
        char buffer&#91;256];
        ssize_t bytes_read = read(fd_read1, buffer, sizeof(buffer) - 1);
        if (bytes_read > 0) {
            buffer&#91;bytes_read] = '\0';
            printf("  test_file1.txt 内容: %s", buffer);
        }
        close(fd_read1);
    }
    
    // 相对读取第二个文件
    int fd_read2 = openat(dirfd, "test_file2.txt", O_RDONLY);
    if (fd_read2 != -1) {
        char buffer&#91;256];
        ssize_t bytes_read = read(fd_read2, buffer, sizeof(buffer) - 1);
        if (bytes_read > 0) {
            buffer&#91;bytes_read] = '\0';
            printf("  test_file2.txt 内容: %s", buffer);
        }
        close(fd_read2);
    }
    
    // 清理资源
    close(dirfd);
    unlink("test_dir/test_file1.txt");
    unlink("test_dir/test_file2.txt");
    rmdir("test_dir");
    
    printf("✓ 完成 openat 基本使用演示\n");
    
    return 0;
}

示例2：错误处理和特殊情况

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>

void test_openat_errors(int dirfd, const char *pathname, int flags, const char *description) {
    printf("\n测试 %s:\n", description);
    printf("  dirfd: %d\n", dirfd);
    printf("  pathname: %s\n", pathname);
    printf("  flags: 0x%x\n", flags);
    
    int fd = openat(dirfd, pathname, flags);
    if (fd == -1) {
        printf("  结果: 失败 - %s\n", strerror(errno));
        switch (errno) {
            case EACCES:
                printf("    原因: 权限不足\n");
                break;
            case ENOENT:
                printf("    原因: 文件或目录不存在\n");
                break;
            case ENOTDIR:
                printf("    原因: dirfd 不是目录文件描述符\n");
                break;
            case EISDIR:
                printf("    原因: 路径指向目录\n");
                break;
            case EEXIST:
                printf("    原因: 文件已存在 (O_CREAT|O_EXCL)\n");
                break;
            case ELOOP:
                printf("    原因: 符号链接层级过深\n");
                break;
            case ENAMETOOLONG:
                printf("    原因: 路径名过长\n");
                break;
            default:
                printf("    原因: 其他错误\n");
                break;
        }
    } else {
        printf("  结果: 成功 (fd: %d)\n", fd);
        close(fd);
    }
}

int main() {
    printf("=== openat 错误处理测试 ===\n");
    
    // 创建测试环境
    if (mkdir("error_test_dir", 0755) == -1 && errno != EEXIST) {
        perror("创建测试目录失败");
        exit(EXIT_FAILURE);
    }
    
    // 创建测试文件
    int test_fd = open("error_test_dir/test_file.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (test_fd != -1) {
        write(test_fd, "test content", 12);
        close(test_fd);
        printf("创建测试文件: error_test_dir/test_file.txt\n");
    }
    
    // 打开目录获取文件描述符
    int dirfd = open("error_test_dir", O_RDONLY);
    if (dirfd == -1) {
        perror("打开测试目录失败");
        unlink("error_test_dir/test_file.txt");
        rmdir("error_test_dir");
        exit(EXIT_FAILURE);
    }
    
    printf("打开测试目录: error_test_dir (dirfd: %d)\n", dirfd);
    
    // 测试正常情况
    test_openat_errors(dirfd, "test_file.txt", O_RDONLY, "正常相对路径打开");
    test_openat_errors(AT_FDCWD, "error_test_dir/test_file.txt", O_RDONLY, "AT_FDCWD 绝对路径打开");
    
    // 测试各种错误情况
    test_openat_errors(dirfd, "nonexistent.txt", O_RDONLY, "不存在的文件");
    test_openat_errors(-2, "test_file.txt", O_RDONLY, "无效的 dirfd");
    test_openat_errors(dirfd, "../outside_file.txt", O_RDONLY, "跳出目录的路径");
    test_openat_errors(dirfd, "", O_RDONLY, "空路径名");
    
    // 测试创建已存在的文件（不使用 O_EXCL）
    test_openat_errors(dirfd, "test_file.txt", O_CREAT | O_WRONLY, "创建已存在的文件");
    
    // 测试原子创建（使用 O_EXCL）
    test_openat_errors(dirfd, "test_file.txt", O_CREAT | O_EXCL | O_WRONLY, 0644, 
                      "原子创建已存在的文件");
    
    // 测试目录作为文件打开
    test_openat_errors(AT_FDCWD, "error_test_dir", O_RDONLY, "将目录作为文件打开");
    
    // 清理测试环境
    close(dirfd);
    unlink("error_test_dir/test_file.txt");
    rmdir("error_test_dir");
    
    return 0;
}

示例3：相对路径和安全性演示

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>
#include <dirent.h>

void demonstrate_security_benefits() {
    printf("=== openat 安全性优势演示 ===\n");
    
    // 创建测试目录结构
    if (mkdir("security_test", 0755) == -1 && errno != EEXIST) {
        perror("创建安全测试目录失败");
        return;
    }
    
    // 在测试目录中创建文件
    int fd = open("security_test/data.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd != -1) {
        write(fd, "sensitive data", 14);
        close(fd);
        printf("创建敏感数据文件: security_test/data.txt\n");
    }
    
    // 打开目录获取文件描述符
    int dirfd = open("security_test", O_RDONLY);
    if (dirfd == -1) {
        perror("打开安全测试目录失败");
        unlink("security_test/data.txt");
        rmdir("security_test");
        return;
    }
    
    printf("✓ 成功打开安全目录 (dirfd: %d)\n", dirfd);
    
    // 演示安全性优势
    printf("\n安全性优势对比:\n");
    
    // 传统方式：可能受到竞态条件影响
    printf("传统方式风险:\n");
    printf("  1. 先检查文件是否存在\n");
    printf("  2. 再打开文件进行操作\n");
    printf("  3. 在步骤1和2之间，文件可能被修改\n");
    
    // openat 方式：更安全
    printf("\nopenat 方式优势:\n");
    printf("  1. 基于已打开的目录文件描述符操作\n");
    printf("  2. 避免路径解析过程中的竞态条件\n");
    printf("  3. 防止目录遍历攻击\n");
    printf("  4. 确保操作在指定目录范围内\n");
    
    // 演示相对路径限制
    printf("\n相对路径限制演示:\n");
    
    // 尝试访问上级目录（应该失败或受限）
    int restricted_fd = openat(dirfd, "../outside_file.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (restricted_fd != -1) {
        printf("  警告: 能够访问上级目录文件\n");
        close(restricted_fd);
        unlink("../outside_file.txt");
    } else {
        printf("  ✓ 正确限制了目录遍历访问\n");
    }
    
    // 正常的相对路径操作
    int normal_fd = openat(dirfd, "data.txt", O_RDONLY);
    if (normal_fd != -1) {
        char buffer&#91;64];
        ssize_t bytes_read = read(normal_fd, buffer, sizeof(buffer) - 1);
        if (bytes_read > 0) {
            buffer&#91;bytes_read] = '\0';
            printf("  ✓ 正常访问相对路径文件: %s\n", buffer);
        }
        close(normal_fd);
    }
    
    // 清理资源
    close(dirfd);
    unlink("security_test/data.txt");
    rmdir("security_test");
}

void demonstrate_relative_path_operations() {
    printf("\n=== 相对路径操作演示 ===\n");
    
    // 创建多层目录结构
    if (mkdir("multi_level", 0755) == -1 && errno != EEXIST) {
        perror("创建多层目录失败");
        return;
    }
    
    if (mkdir("multi_level/subdir1", 0755) == -1 && errno != EEXIST) {
        perror("创建子目录1失败");
        rmdir("multi_level");
        return;
    }
    
    if (mkdir("multi_level/subdir2", 0755) == -1 && errno != EEXIST) {
        perror("创建子目录2失败");
        rmdir("multi_level/subdir1");
        rmdir("multi_level");
        return;
    }
    
    printf("创建多层目录结构:\n");
    printf("  multi_level/\n");
    printf("    subdir1/\n");
    printf("    subdir2/\n");
    
    // 打开根目录
    int root_fd = open("multi_level", O_RDONLY);
    if (root_fd == -1) {
        perror("打开根目录失败");
        rmdir("multi_level/subdir1");
        rmdir("multi_level/subdir2");
        rmdir("multi_level");
        return;
    }
    
    printf("✓ 打开根目录 (fd: %d)\n", root_fd);
    
    // 相对于根目录在 subdir1 中创建文件
    int subdir1_fd = openat(root_fd, "subdir1", O_RDONLY);
    if (subdir1_fd != -1) {
        int file1_fd = openat(subdir1_fd, "file1.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
        if (file1_fd != -1) {
            write(file1_fd, "Content in subdir1", 18);
            close(file1_fd);
            printf("✓ 在 subdir1 中创建文件\n");
        }
        close(subdir1_fd);
    }
    
    // 相对于根目录在 subdir2 中创建文件
    int subdir2_fd = openat(root_fd, "subdir2", O_RDONLY);
    if (subdir2_fd != -1) {
        int file2_fd = openat(subdir2_fd, "file2.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
        if (file2_fd != -1) {
            write(file2_fd, "Content in subdir2", 18);
            close(file2_fd);
            printf("✓ 在 subdir2 中创建文件\n");
        }
        close(subdir2_fd);
    }
    
    // 验证创建的文件
    printf("\n验证创建的文件:\n");
    
    // 读取 subdir1 中的文件
    subdir1_fd = openat(root_fd, "subdir1", O_RDONLY);
    if (subdir1_fd != -1) {
        int read_fd = openat(subdir1_fd, "file1.txt", O_RDONLY);
        if (read_fd != -1) {
            char buffer&#91;64];
            ssize_t bytes_read = read(read_fd, buffer, sizeof(buffer) - 1);
            if (bytes_read > 0) {
                buffer&#91;bytes_read] = '\0';
                printf("  subdir1/file1.txt: %s\n", buffer);
            }
            close(read_fd);
        }
        close(subdir1_fd);
    }
    
    // 读取 subdir2 中的文件
    subdir2_fd = openat(root_fd, "subdir2", O_RDONLY);
    if (subdir2_fd != -1) {
        int read_fd = openat(subdir2_fd, "file2.txt", O_RDONLY);
        if (read_fd != -1) {
            char buffer&#91;64];
            ssize_t bytes_read = read(read_fd, buffer, sizeof(buffer) - 1);
            if (bytes_read > 0) {
                buffer&#91;bytes_read] = '\0';
                printf("  subdir2/file2.txt: %s\n", buffer);
            }
            close(read_fd);
        }
        close(subdir2_fd);
    }
    
    // 清理资源
    close(root_fd);
    
    // 删除创建的文件和目录
    unlink("multi_level/subdir1/file1.txt");
    unlink("multi_level/subdir2/file2.txt");
    rmdir("multi_level/subdir1");
    rmdir("multi_level/subdir2");
    rmdir("multi_level");
    
    printf("✓ 完成相对路径操作演示\n");
}

int main() {
    printf("=== openat 安全性和相对路径演示 ===\n");
    
    demonstrate_security_benefits();
    demonstrate_relative_path_operations();
    
    return 0;
}

示例4：高级文件操作工具

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <string.h>
#include <dirent.h>

typedef struct {
    int fd;
    char path&#91;256];
    int is_dir;
} file_handle_t;

#define MAX_HANDLES 64
static file_handle_t handles&#91;MAX_HANDLES];
static int handle_count = 0;

int register_handle(int fd, const char *path, int is_dir) {
    if (handle_count >= MAX_HANDLES) {
        return -1;
    }
    
    handles&#91;handle_count].fd = fd;
    strncpy(handles&#91;handle_count].path, path, sizeof(handles&#91;handle_count].path) - 1);
    handles&#91;handle_count].path&#91;sizeof(handles&#91;handle_count].path) - 1] = '\0';
    handles&#91;handle_count].is_dir = is_dir;
    
    return handle_count++;
}

int unregister_handle(int fd) {
    for (int i = 0; i < handle_count; i++) {
        if (handles&#91;i].fd == fd) {
            // 将后续句柄前移
            for (int j = i; j < handle_count - 1; j++) {
                handles&#91;j] = handles&#91;j + 1];
            }
            handle_count--;
            return 0;
        }
    }
    return -1;
}

void list_handles() {
    printf("=== 当前打开的文件句柄 ===\n");
    
    if (handle_count == 0) {
        printf("没有打开的文件句柄\n");
        return;
    }
    
    printf("%-4s %-6s %-8s %s\n", "ID", "FD", "类型", "路径");
    printf("%-4s %-6s %-8s %s\n", "--", "--", "--", "--");
    
    for (int i = 0; i < handle_count; i++) {
        printf("%-4d %-6d %-8s %s\n",
               i, handles&#91;i].fd,
               handles&#91;i].is_dir ? "目录" : "文件",
               handles&#91;i].path);
    }
    
    printf("总计: %d 个句柄\n", handle_count);
}

void interactive_file_manager() {
    int choice;
    char path&#91;256];
    int dirfd;
    int flags;
    mode_t mode;
    
    while (1) {
        printf("\n=== openat 文件管理工具 ===\n");
        printf("1. 相对打开文件\n");
        printf("2. 打开目录\n");
        printf("3. 创建文件\n");
        printf("4. 关闭文件句柄\n");
        printf("5. 列出所有句柄\n");
        printf("6. 读取文件内容\n");
        printf("7. 写入文件内容\n");
        printf("8. 显示文件状态\n");
        printf("0. 退出\n");
        printf("请选择操作: ");
        
        if (scanf("%d", &choice) != 1) {
            printf("输入无效\n");
            while (getchar() != '\n');  // 清空输入缓冲区
            continue;
        }
        
        switch (choice) {
            case 1:
                printf("相对打开文件:\n");
                printf("输入目录句柄 ID (或 -1 表示 AT_FDCWD): ");
                int dir_id;
                if (scanf("%d", &dir_id) == 1) {
                    if (dir_id == -1) {
                        dirfd = AT_FDCWD;
                    } else if (dir_id >= 0 && dir_id < handle_count && handles&#91;dir_id].is_dir) {
                        dirfd = handles&#91;dir_id].fd;
                    } else {
                        printf("无效的目录句柄 ID\n");
                        break;
                    }
                    
                    printf("输入文件路径: ");
                    scanf("%255s", path);
                    
                    printf("输入打开标志 (O_RDONLY=0, O_WRONLY=1, O_RDWR=2): ");
                    int flag_choice;
                    if (scanf("%d", &flag_choice) == 1) {
                        switch (flag_choice) {
                            case 0: flags = O_RDONLY; break;
                            case 1: flags = O_WRONLY; break;
                            case 2: flags = O_RDWR; break;
                            default: flags = O_RDONLY; break;
                        }
                        
                        int fd = openat(dirfd, path, flags);
                        if (fd != -1) {
                            char full_path&#91;512];
                            if (dir_id == -1) {
                                snprintf(full_path, sizeof(full_path), "%s", path);
                            } else {
                                snprintf(full_path, sizeof(full_path), "%s/%s", 
                                        handles&#91;dir_id].path, path);
                            }
                            
                            int id = register_handle(fd, full_path, 0);
                            if (id != -1) {
                                printf("✓ 成功打开文件 (ID: %d, FD: %d)\n", id, fd);
                            } else {
                                printf("✗ 注册句柄失败\n");
                                close(fd);
                            }
                        } else {
                            printf("✗ 打开文件失败: %s\n", strerror(errno));
                        }
                    }
                }
                break;
                
            case 2:
                printf("打开目录:\n");
                printf("输入目录路径: ");
                scanf("%255s", path);
                
                int dir_fd = open(path, O_RDONLY);
                if (dir_fd != -1) {
                    int id = register_handle(dir_fd, path, 1);
                    if (id != -1) {
                        printf("✓ 成功打开目录 (ID: %d, FD: %d)\n", id, dir_fd);
                    } else {
                        printf("✗ 注册句柄失败\n");
                        close(dir_fd);
                    }
                } else {
                    printf("✗ 打开目录失败: %s\n", strerror(errno));
                }
                break;
                
            case 3:
                printf("创建文件:\n");
                printf("输入目录句柄 ID (或 -1 表示 AT_FDCWD): ");
                if (scanf("%d", &dir_id) == 1) {
                    if (dir_id == -1) {
                        dirfd = AT_FDCWD;
                    } else if (dir_id >= 0 && dir_id < handle_count && handles&#91;dir_id].is_dir) {
                        dirfd = handles&#91;dir_id].fd;
                    } else {
                        printf("无效的目录句柄 ID\n");
                        break;
                    }
                    
                    printf("输入文件路径: ");
                    scanf("%255s", path);
                    
                    flags = O_CREAT | O_WRONLY | O_TRUNC;
                    mode = 0644;
                    
                    int fd = openat(dirfd, path, flags, mode);
                    if (fd != -1) {
                        char full_path&#91;512];
                        if (dir_id == -1) {
                            snprintf(full_path, sizeof(full_path), "%s", path);
                        } else {
                            snprintf(full_path, sizeof(full_path), "%s/%s", 
                                    handles&#91;dir_id].path, path);
                        }
                        
                        int id = register_handle(fd, full_path, 0);
                        if (id != -1) {
                            printf("✓ 成功创建文件 (ID: %d, FD: %d)\n", id, fd);
                        } else {
                            printf("✗ 注册句柄失败\n");
                            close(fd);
                        }
                    } else {
                        printf("✗ 创建文件失败: %s\n", strerror(errno));
                    }
                }
                break;
                
            case 4: {
                if (handle_count == 0) {
                    printf("没有打开的句柄\n");
                    break;
                }
                
                list_handles();
                printf("输入要关闭的句柄 ID: ");
                int handle_id;
                if (scanf("%d", &handle_id) == 1) {
                    if (handle_id >= 0 && handle_id < handle_count) {
                        printf("关闭句柄: %s (FD: %d)\n", 
                               handles&#91;handle_id].path, handles&#91;handle_id].fd);
                        
                        close(handles&#91;handle_id].fd);
                        unregister_handle(handles&#91;handle_id].fd);
                        printf("✓ 成功关闭句柄\n");
                    } else {
                        printf("无效的句柄 ID\n");
                    }
                }
                break;
            }
            
            case 5:
                list_handles();
                break;
                
            case 6: {
                if (handle_count == 0) {
                    printf("没有打开的句柄\n");
                    break;
                }
                
                list_handles();
                printf("输入要读取的文件句柄 ID: ");
                int handle_id;
                if (scanf("%d", &handle_id) == 1) {
                    if (handle_id >= 0 && handle_id < handle_count && !handles&#91;handle_id].is_dir) {
                        char buffer&#91;256];
                        ssize_t bytes_read = read(handles&#91;handle_id].fd, buffer, sizeof(buffer) - 1);
                        if (bytes_read > 0) {
                            buffer&#91;bytes_read] = '\0';
                            printf("文件内容:\n%s\n", buffer);
                        } else if (bytes_read == 0) {
                            printf("文件为空\n");
                        } else {
                            printf("读取文件失败: %s\n", strerror(errno));
                        }
                    } else {
                        printf("无效的文件句柄 ID\n");
                    }
                }
                break;
            }
            
            case 7: {
                if (handle_count == 0) {
                    printf("没有打开的句柄\n");
                    break;
                }
                
                list_handles();
                printf("输入要写入的文件句柄 ID: ");
                int handle_id;
                if (scanf("%d", &handle_id) == 1) {
                    if (handle_id >= 0 && handle_id < handle_count && !handles&#91;handle_id].is_dir) {
                        printf("输入要写入的内容: ");
                        char content&#91;256];
                        scanf("%255s", content);
                        
                        ssize_t bytes_written = write(handles&#91;handle_id].fd, content, strlen(content));
                        if (bytes_written > 0) {
                            printf("✓ 成功写入 %zd 字节\n", bytes_written);
                        } else {
                            printf("✗ 写入文件失败: %s\n", strerror(errno));
                        }
                    } else {
                        printf("无效的文件句柄 ID\n");
                    }
                }
                break;
            }
            
            case 8: {
                if (handle_count == 0) {
                    printf("没有打开的句柄\n");
                    break;
                }
                
                list_handles();
                printf("输入要查看状态的句柄 ID: ");
                int handle_id;
                if (scanf("%d", &handle_id) == 1) {
                    if (handle_id >= 0 && handle_id < handle_count) {
                        struct stat st;
                        if (fstat(handles&#91;handle_id].fd, &st) == 0) {
                            printf("文件状态:\n");
                            printf("  路径: %s\n", handles&#91;handle_id].path);
                            printf("  大小: %ld 字节\n", (long)st.st_size);
                            printf("  权限: %o\n", st.st_mode & 0777);
                            printf("  inode: %ld\n", (long)st.st_ino);
                            printf("  链接数: %ld\n", (long)st.st_nlink);
                            printf("  所有者: %d\n", st.st_uid);
                            printf("  组: %d\n", st.st_gid);
                        } else {
                            printf("获取文件状态失败: %s\n", strerror(errno));
                        }
                    } else {
                        printf("无效的句柄 ID\n");
                    }
                }
                break;
            }
            
            case 0:
                printf("退出文件管理工具\n");
                // 清理所有打开的句柄
                for (int i = 0; i < handle_count; i++) {
                    close(handles&#91;i].fd);
                }
                handle_count = 0;
                return;
                
            default:
                printf("无效选择\n");
                break;
        }
    }
}

void demonstrate_openat_features() {
    printf("=== openat 特性演示 ===\n");
    
    printf("openat 主要特性:\n");
    printf("1. 相对路径支持: 相对于目录文件描述符打开文件\n");
    printf("2. 安全性提升: 避免路径解析竞态条件\n");
    printf("3. 灵活性: 支持 AT_FDCWD 和绝对路径\n");
    printf("4. 原子操作: 结合 O_CREAT|O_EXCL 实现原子创建\n");
    printf("5. 容器友好: 在受限环境中更安全\n");
    
    printf("\n使用场景:\n");
    printf("• 容器和沙箱环境中的文件操作\n");
    printf("• 多线程程序中的安全文件访问\n");
    printf("• 需要避免竞态条件的文件操作\n");
    printf("• 相对路径操作的系统工具\n");
}

int main() {
    printf("=== openat 高级文件操作工具 ===\n");
    
    // 显示系统信息
    printf("系统信息:\n");
    printf("  PID: %d\n", getpid());
    printf("  页面大小: %ld 字节\n", (long)getpagesize());
    
    // 演示特性
    demonstrate_openat_features();
    
    // 启动交互式管理器
    char choice;
    printf("\n是否启动交互式文件管理器? (y/N): ");
    if (scanf(" %c", &choice) == 1 && (choice == 'y' || choice == 'Y')) {
        interactive_file_manager();
    }
    
    return 0;
}

9. openat 与 open 的对比

// open vs openat 对比:

// 传统 open 函数:
int fd1 = open("/path/to/file.txt", O_RDONLY);
// • 只能使用绝对或相对于当前目录的路径
// • 可能存在竞态条件
// • 在多线程环境中不够安全

// 现代 openat 函数:
int dirfd = open("/path/to", O_RDONLY);
int fd2 = openat(dirfd, "file.txt", O_RDONLY);
// • 支持相对路径操作
// • 基于已打开的目录文件描述符
// • 避免路径解析竞态条件
// • 更安全的文件操作方式

// 特殊值 AT_FDCWD:
int fd3 = openat(AT_FDCWD, "relative/path.txt", O_RDONLY);
// • 等同于传统的相对路径 open
// • 提供统一的接口

10. 实际应用场景

场景1：安全的文件操作

int secure_file_operation(const char *base_dir, const char *filename) {
    // 打开基础目录
    int dirfd = open(base_dir, O_RDONLY);
    if (dirfd == -1) {
        return -1;
    }
    
    // 相对打开文件（防止目录遍历攻击）
    int fd = openat(dirfd, filename, O_RDONLY);
    if (fd == -1) {
        close(dirfd);
        return -1;
    }
    
    // 执行安全的文件操作
    // ...
    
    close(fd);
    close(dirfd);
    return 0;
}

场景2：容器环境文件访问

int container_file_access(int root_dirfd, const char *path) {
    // 在容器根目录中安全访问文件
    // 防止跳出容器文件系统
    return openat(root_dirfd, path, O_RDONLY);
}

场景3：批量文件操作

int process_directory_files(const char *dirname) {
    int dirfd = open(dirname, O_RDONLY);
    if (dirfd == -1) return -1;
    
    DIR *dir = fdopendir(dirfd);
    if (!dir) {
        close(dirfd);
        return -1;
    }
    
    struct dirent *entry;
    while ((entry = readdir(dir)) != NULL) {
        if (entry->d_name&#91;0] == '.') continue;
        
        // 相对打开每个文件
        int fd = openat(dirfd, entry->d_name, O_RDONLY);
        if (fd != -1) {
            // 处理文件...
            process_file(fd);
            close(fd);
        }
    }
    
    closedir(dir);
    return 0;
}

11. 注意事项

使用 openat 时需要注意：

目录文件描述符: dirfd 必须是有效的目录文件描述符

路径安全: 防止目录遍历攻击（如 ../）

资源管理: 及时关闭文件描述符避免资源泄漏

错误处理: 仔细处理各种可能的错误情况

权限检查: 确保有足够的权限访问目标文件

竞态条件: 在多线程环境中注意同步

12. 系统配置检查

# 查看系统支持的文件操作特性
grep -i openat /proc/self/maps

# 查看文件系统信息
df -T

# 查看进程打开的文件描述符
ls -la /proc/self/fd/

# 检查系统调用表
ausyscall openat

# 查看系统限制
ulimit -n

总结

openat 是现代 Linux 系统中推荐使用的文件打开函数：

关键特性:1. 相对路径支持: 相对于目录文件描述符打开文件2. 安全性提升: 避免路径解析竞态条件3. 灵活性增强: 支持多种操作模式4. 容器友好: 在受限环境中更安全

主要应用:1. 安全的文件操作程序2. 容器和虚拟化环境3. 系统管理和监控工具4. 需要避免竞态条件的应用

使用要点:1. 理解 dirfd 参数的含义和使用2. 正确处理相对路径和绝对路径3. 注意资源管理和错误处理4. 利用安全性优势防止攻击

openat 为现代 Linux 应用程序提供了更安全、更灵活的文件操作方式，是系统编程的重要工具。

2025-08-16

Linux系统编程

pause系统调用及示例

我们继续学习 Linux 系统编程中的重要函数。这次我们介绍 pause 函数，它是一个非常简单的系统调用，功能是使调用它的进程（或线程）进入睡眠（阻塞）状态，直到该进程接收到一个信号（signal）为止。

1. 函数介绍

pause 是一个 Linux 系统调用，它的作用非常直接：挂起调用它的进程，使其进入可中断的睡眠状态（interruptible sleep state）。进程会一直保持睡眠，不消耗 CPU 时间，直到发生以下两种情况之一：

接收到信号: 进程被一个信号中断。这可以是任何信号，例如 SIGINT (Ctrl+C), SIGTERM (终止), SIGUSR1 (用户自定义信号) 等。2. 进程被杀死: 例如收到 SIGKILL 信号，但这通常不会让 pause 返回，因为进程直接被终止了。当进程因信号而被唤醒时，pause 调用会返回。

pause 通常用于那些需要无限期等待某个外部事件（通过信号来通知）的程序中。它提供了一种简单、高效（不占用 CPU）的等待机制。

你可以把它想象成一个人在等待电话。他什么也不做，只是静静地坐着（睡眠），直到电话铃响（收到信号），他才会起身去接电话（pause 返回）。

2. 函数原型

#include <unistd.h> // 必需

int pause(void);

3. 功能

进入睡眠: 调用 pause 的进程会立即放弃 CPU，并被放入内核的等待队列中。
等待信号: 进程进入睡眠状态，直到有任何信号递达（delivered）到该进程。
被信号中断: 当信号被递达时（并且该信号没有被忽略或导致进程终止），进程会从 pause 调用中返回。

4. 参数

void: pause 函数不接受任何参数。

5. 返回值

总是返回 -1: pause 调用永远不会成功返回一个非负值。
总是设置 errno: 当 pause 因接收到信号而返回时，它会将 errno 设置为 EINTR (Interrupted system call)。

重要: pause 的返回唯一原因就是被信号中断。因此，检查返回值和 errno 通常是确认 pause 是因信号返回的标准做法。

6. 相似函数，或关联函数

sleep, nanosleep: 这些函数使进程睡眠指定的时间。pause 是无限期睡眠，直到信号。
sigsuspend: 这是一个更高级、更安全的用于等待信号的函数。它允许在等待信号的原子性操作中临时替换进程的信号掩码（blocked signals set）。这可以避免在设置信号掩码和调用 pause 之间收到信号的竞态条件（race condition）。
信号处理函数 (signal, sigaction): 用于设置当进程收到特定信号时应执行的操作。
sigprocmask: 用于检查或修改进程的信号掩码（哪些信号被阻塞）。
wait, waitpid: 使进程等待子进程状态改变（结束、停止等），这也是一种阻塞等待。

7. 示例代码

示例 1：基本的 pause 使用和信号处理

这个例子演示了如何使用 pause 使进程等待信号，并通过信号处理函数来响应信号。

#include <unistd.h>   // pause
#include <stdio.h>    // printf, perror
#include <stdlib.h>   // exit
#include <signal.h>   // signal, SIGINT, SIGTERM
#include <errno.h>    // errno
#include <string.h>   // memset

// 全局标志，用于在信号处理函数和主循环间通信
volatile sig_atomic_t signal_received = 0;
volatile int last_signal = 0;

// 信号处理函数
void signal_handler(int sig) {
    printf("\nSignal handler called for signal %d\n", sig);
    signal_received = 1;
    last_signal = sig;
    // 注意：在信号处理函数中，应只调用异步信号安全的函数
    // printf 通常被认为是安全的，但严格来说不是 100% 可靠
    // 更安全的做法是只设置标志位，然后在主循环中检查
}

int main() {
    printf("Process PID: %d\n", getpid());
    printf("Try sending signals using 'kill %d' or pressing Ctrl+C\n", getpid());
    printf("Send SIGTERM (kill %d) or SIGINT (Ctrl+C) to exit.\n", getpid());

    // 1. 设置信号处理函数
    if (signal(SIGINT, signal_handler) == SIG_ERR) {
        perror("signal SIGINT");
        exit(EXIT_FAILURE);
    }
    if (signal(SIGTERM, signal_handler) == SIG_ERR) {
        perror("signal SIGTERM");
        exit(EXIT_FAILURE);
    }
    // 忽略 SIGUSR1，但它仍然会中断 pause
    if (signal(SIGUSR1, SIG_IGN) == SIG_ERR) {
        perror("signal SIGUSR1");
        exit(EXIT_FAILURE);
    }

    printf("Entering main loop with pause()...\n");

    // 2. 主循环
    while (1) {
        // 3. 调用 pause 进入睡眠
        printf("Going to sleep... (waiting for a signal)\n");
        int result = pause(); // 进程在此处挂起

        // 4. pause 返回（唯一原因是被信号中断）
        if (result == -1 && errno == EINTR) {
            printf("pause() was interrupted by a signal (errno=EINTR).\n");
            
            // 5. 检查是哪个信号
            if (signal_received) {
                printf("Handled signal %d in signal handler.\n", last_signal);
                if (last_signal == SIGINT || last_signal == SIGTERM) {
                    printf("Received exit signal. Cleaning up and exiting.\n");
                    break; // 退出主循环
                }
                // 为下一次循环重置标志
                signal_received = 0;
            }
        } else {
            // 这理论上不应该发生，因为 pause 总是返回 -1 和 EINTR
            printf("Unexpected return from pause(): result=%d, errno=%d (%s)\n",
                   result, errno, strerror(errno));
        }
    }

    printf("Main loop exited. Performing cleanup...\n");
    // 这里可以执行一些清理工作

    printf("Program exiting normally.\n");
    return 0;
}

代码解释:

定义了两个 volatile sig_atomic_t 类型的全局变量 signal_received 和 last_signal。volatile 确保编译器不会优化对它们的访问，sig_atomic_t 是一种推荐用于信号处理函数中修改的整数类型，保证了原子性。2. 定义了一个信号处理函数 signal_handler。当进程收到 SIGINT (Ctrl+C) 或 SIGTERM 时，该函数会被调用。它打印一条消息，并设置全局标志。3. 在 main 函数中，使用 signal() 函数为 SIGINT 和 SIGTERM 注册了处理函数。对于 SIGUSR1，设置为忽略 (SIG_IGN)，但请注意，即使是被忽略的信号，也能中断 pause。4. 进入一个无限循环 while(1)。5. 在循环内部调用 pause()。进程在此处进入睡眠状态。6. 当进程收到信号时，pause() 调用返回，并将 errno 设置为 EINTR。7. 检查 pause 的返回值和 errno。如果符合预期（-1 和 EINTR），则继续处理。8. 检查全局标志 signal_received，确定是哪个信号导致了 pause 返回，并根据信号类型决定是否退出循环。9. 如果收到 SIGINT 或 SIGTERM，则跳出循环，执行清理工作并退出程序。

编译和运行:

gcc -o pause_example pause_example.c
./pause_example
# 在另一个终端:
# kill -USR1 <PID>  # 发送 SIGUSR1 (会被忽略，但会中断 pause)
# kill <PID>        # 发送 SIGTERM (默认信号，会退出)
# kill -INT <PID>   # 发送 SIGINT (等同于 Ctrl+C)

示例 2：使用 pause 等待子进程结束 (不推荐，仅作演示)

虽然 wait/waitpid 是等待子进程结束的标准方法，但这个例子演示了如何（不推荐地）使用 pause 和 SIGCHLD 信号来实现类似功能。

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <sys/wait.h>
#include <errno.h>
#include <string.h>

volatile sig_atomic_t child_done = 0;

void sigchld_handler(int sig) {
    // 在信号处理函数中，通常只应设置标志位
    // 实际的 wait 操作应在主循环中进行，以避免特定的竞争条件
    // 这里简化处理
    printf("SIGCHLD received.\n");
    child_done = 1;
}

int main() {
    pid_t pid;

    // 1. 设置 SIGCHLD 信号处理函数
    // SIGCHLD 在子进程状态改变时发送给父进程
    if (signal(SIGCHLD, sigchld_handler) == SIG_ERR) {
        perror("signal SIGCHLD");
        exit(EXIT_FAILURE);
    }

    // 2. 创建子进程
    pid = fork();
    if (pid == -1) {
        perror("fork");
        exit(EXIT_FAILURE);
    }

    if (pid == 0) {
        // --- 子进程 ---
        printf("Child process (PID %d) started.\n", getpid());
        sleep(5); // 模拟工作
        printf("Child process (PID %d) finished.\n", getpid());
        _exit(EXIT_SUCCESS);
    } else {
        // --- 父进程 ---
        printf("Parent process (PID %d) created child (PID %d).\n", getpid(), pid);

        // 3. 等待子进程结束
        printf("Parent entering loop with pause() to wait for child...\n");
        while (!child_done) {
            printf("Parent is waiting (paused)...\n");
            pause(); // 等待信号 (期望是 SIGCHLD)
            printf("Parent woke up from pause().\n");
            
            if (child_done) {
                printf("Parent detected child is done via signal flag.\n");
                // 清理僵尸进程
                int status;
                pid_t waited_pid = wait(&status);
                if (waited_pid == -1) {
                    perror("wait");
                } else {
                    printf("Parent reaped child PID %d with status %d.\n", waited_pid, status);
                }
            }
        }
        printf("Parent process finished.\n");
    }

    return 0;
}

代码解释:

定义了一个全局标志 child_done。

定义了 SIGCHLD 信号的处理函数 sigchld_handler，当子进程结束时，内核会向父进程发送 SIGCHLD 信号，该处理函数会设置 child_done 标志。

在 main 函数中，为 SIGCHLD 注册处理函数。

使用 fork 创建子进程。

子进程: 睡眠 5 秒后退出。

父进程:

进入一个循环，循环条件是 child_done 为假。
在循环中调用 pause()，使父进程睡眠。
当子进程结束，内核发送 SIGCHLD 信号，sigchld_handler 被调用，设置 child_done 为真。
pause() 返回，循环检查 child_done，发现为真，于是调用 wait() 清理子进程（收割僵尸进程）并退出循环。

重要提示与注意事项:

sigsuspend vs pause: 直接使用 pause 等待信号时，可能会遇到竞态条件。例如，你可能想在等待信号前先阻塞某些信号。如果在阻塞信号和调用 pause 之间信号到达，信号会被挂起，但 pause 会立即返回（因为信号已挂起）。sigsuspend 可以原子性地设置新的信号掩码并挂起进程，避免了这种竞态条件，是更推荐的方式。2. 信号安全: 在信号处理函数中，应只调用异步信号安全（async-signal-safe）的函数。printf, write (到 stderr) 通常被认为是安全的，但最好还是限制在修改 volatile sig_atomic_t 变量等简单操作。3. SIGCHLD 处理: 示例 2 中的 SIGCHLD 处理方式是简化的。在实际应用中，一个信号处理函数可能需要处理多个子进程的退出，且 wait 可能需要在一个循环中调用直到没有更多子进程结束。使用 waitpid 通常更精确。4. pause 的局限性: pause 只能等待任何信号。如果你只想等待特定信号，pause 本身无法做到，需要结合信号处理函数和全局标志来间接实现。

总结:

pause 是一个简单但重要的系统调用，用于使进程高效地（不消耗 CPU）等待信号。理解其工作原理以及与信号处理机制的结合使用是掌握 Linux 进程控制和同步的基础。在需要等待异步事件时，它是一个非常有用的工具，尽管在某些复杂场景下，sigsuspend 可能是更安全的选择。

https://www.calcguide.tech/2025/08/16/pause系统调用及示例/

2025-08-16

Linux系统编程

preadv1系统调用及示例

preadv 函数

preadv 是 pread 的分散读取版本，它允许一次性从文件的指定位置读取数据到多个不连续的缓冲区中。这是分散/聚集I/O操作的一部分。(https://www.calcguide.tech/2025/08/16/preadv1系统调用及示例/)

1. 函数介绍

preadv 是 pread 的分散读取版本，它允许一次性从文件的指定位置读取数据到多个不连续的缓冲区中。这是分散/聚集I/O操作的一部分。

2. 函数原型

#define _GNU_SOURCE
#include <sys/uio.h>
ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);

3. 功能

从文件描述符 fd 指定的文件中，从 offset 位置开始读取数据到由 iov 描述的多个缓冲区中。该操作不会改变文件的当前读写位置。

4. 参数

int fd: 文件描述符，必须是已打开的文件
*const struct iovec iov: iovec结构体数组，描述多个缓冲区
int iovcnt: iov数组中的元素个数
off_t offset: 文件中的偏移量（从文件开始处计算）

5. 返回值

成功: 返回实际读取的总字节数
文件末尾: 返回0
失败: 返回-1，并设置errno

6. 相似函数，或关联函数

readv: 基本的分散读取函数
pread: 单缓冲区定位读取函数
pwritev: 对应的写入函数

7. 示例代码

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

/**
 * 使用preadv进行分散读取
 */
int demo_preadv_basic() {
    int fd;
    struct iovec iov&#91;3];
    char buf1&#91;20], buf2&#91;15], buf3&#91;30];
    ssize_t total_bytes;
    
    // 创建测试文件
    fd = open("test_preadv.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd == -1) {
        perror("创建测试文件失败");
        return -1;
    }
    
    // 写入测试数据
    const char *test_data = "This is a long test string for preadv demonstration purposes.";
    write(fd, test_data, strlen(test_data));
    close(fd);
    
    // 打开文件进行分散读取
    fd = open("test_preadv.txt", O_RDONLY);
    if (fd == -1) {
        perror("打开文件失败");
        return -1;
    }
    
    printf("=== preadv 基本使用示例 ===\n");
    printf("测试数据: %s\n", test_data);
    printf("数据长度: %zu 字节\n\n", strlen(test_data));
    
    // 设置iovec数组
    iov&#91;0].iov_base = buf1;
    iov&#91;0].iov_len = sizeof(buf1) - 1;
    iov&#91;1].iov_base = buf2;
    iov&#91;1].iov_len = sizeof(buf2) - 1;
    iov&#91;2].iov_base = buf3;
    iov&#91;2].iov_len = sizeof(buf3) - 1;
    
    // 从偏移量0开始分散读取
    total_bytes = preadv(fd, iov, 3, 0);
    if (total_bytes == -1) {
        perror("preadv 失败");
        close(fd);
        return -1;
    }
    
    printf("preadv 读取了 %zd 字节到3个缓冲区:\n", total_bytes);
    
    // 添加字符串结束符并显示结果
    buf1&#91;iov&#91;0].iov_len] = '\0';
    buf2&#91;iov&#91;1].iov_len] = '\0';
    buf3&#91;iov&#91;2].iov_len] = '\0';
    
    printf("缓冲区1 (%zu字节): %s\n", iov&#91;0].iov_len, buf1);
    printf("缓冲区2 (%zu字节): %s\n", iov&#91;1].iov_len, buf2);
    printf("缓冲区3 (%zu字节): %s\n", iov&#91;2].iov_len, buf3);
    
    close(fd);
    unlink("test_preadv.txt");
    return 0;
}

/**
 * 演示preadv读取结构体数据
 */
struct Person {
    int id;
    char name&#91;20];
    float score;
};

int demo_preadv_struct() {
    int fd;
    struct iovec iov&#91;3];
    struct Person person = {1001, "Alice Johnson", 95.5};
    int read_id;
    char read_name&#91;20];
    float read_score;
    
    printf("\n=== preadv 读取结构体数据示例 ===\n");
    
    // 创建包含结构体数据的文件
    fd = open("person_data.bin", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd == -1) {
        perror("创建数据文件失败");
        return -1;
    }
    
    write(fd, &person, sizeof(person));
    close(fd);
    
    // 使用preadv分别读取结构体的各个字段
    fd = open("person_data.bin", O_RDONLY);
    if (fd == -1) {
        perror("打开数据文件失败");
        return -1;
    }
    
    // 设置iovec读取结构体的各个部分
    iov&#91;0].iov_base = &read_id;
    iov&#91;0].iov_len = sizeof(read_id);
    iov&#91;1].iov_base = read_name;
    iov&#91;1].iov_len = sizeof(read_name);
    iov&#91;2].iov_base = &read_score;
    iov&#91;2].iov_len = sizeof(read_score);
    
    ssize_t bytes_read = preadv(fd, iov, 3, 0);
    if (bytes_read == -1) {
        perror("preadv 读取结构体失败");
        close(fd);
        return -1;
    }
    
    printf("读取了 %zd 字节的结构体数据:\n", bytes_read);
    printf("ID: %d\n", read_id);
    printf("Name: %s\n", read_name);
    printf("Score: %.1f\n", read_score);
    
    close(fd);
    unlink("person_data.bin");
    return 0;
}

int main() {
    if (demo_preadv_basic() == 0) {
        demo_preadv_struct();
        printf("\n=== preadv 使用总结 ===\n");
        printf("优点：一次系统调用读取多个缓冲区，减少系统调用开销\n");
        printf("适用场景：读取结构化数据，协议解析，网络数据包处理\n");
    }
    return 0;
}

2025-08-16

Linux系统编程

preadv2系统调用及示例

好的，我们继续按照您的要求学习 Linux 系统编程中的重要函数。这次我们介绍 preadv2、pwritev2 和 pkey_mprotect。

函数 1: preadv2

1. 函数介绍

preadv2 (pread vector 2) 是 preadv 系统调用的扩展版本。它结合了 pread（带偏移量读取）和 readv（分散读取）的优点，并引入了一个新的 flags 参数，提供了更灵活的 I/O 控制选项。

简单来说，preadv2 允许你从文件的指定偏移量开始，将数据分散读入到多个不连续的缓冲区中，同时还能指定一些高级 I/O 行为（通过 flags）。

2. 函数原型

#define _GNU_SOURCE // 必须定义以使用 preadv2
#include <sys/uio.h> // struct iovec
#include <unistd.h>  // ssize_t

ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);

3. 功能

从文件描述符 fd 指定的文件中，从绝对偏移量 offset 开始读取数据。
将读取的数据分散存储到由 iov 和 iovcnt 指定的多个缓冲区中。
不修改文件的当前读写位置指针（lseek 位置）。
根据 flags 参数执行特定的 I/O 操作。

4. 参数

int fd: 有效的文件描述符。
const struct iovec *iov: 指向 struct iovec 数组的指针，描述了多个分散的缓冲区。
int iovcnt: iov 数组中元素的个数。
off_t offset: 在文件中开始读取的绝对偏移量（以字节为单位）。必须是非负数。

int flags: 控制 I/O 行为的标志。可以是以下值的按位或组合：

0: 默认行为，等同于 preadv。
RWF_HIPRI: 尝试使用高优先级/实时 I/O（如果内核和设备支持）。
RWF_DSYNC: 要求 I/O 操作具有数据同步持久性（类似于 O_DSYNC）。
RWF_SYNC: 要求 I/O 操作具有文件同步持久性（类似于 O_SYNC）。
RWF_NOWAIT: 非阻塞。如果 I/O 无法立即完成（例如，需要从磁盘读取而数据不在页缓存中），则不等待，立即返回错误 EAGAIN。这需要内核和文件系统支持。
RWF_APPEND: 强制将写入追加到文件末尾（仅对 pwritev2 有效）。

5. 返回值

成功时: 返回实际读取的总字节数（0 表示 EOF）。
失败时: 返回 -1，并设置 errno。

函数 2: pwritev2

1. 函数介绍

pwritev2 (pwrite vector 2) 是 pwritev 系统调用的扩展版本。它结合了 pwrite（带偏移量写入）和 writev（集中写入）的优点，并同样引入了 flags 参数。

简单来说，pwritev2 允许你从多个不连续的缓冲区中收集数据，并将其写入到文件的指定偏移量处，同时还能指定一些高级 I/O 行为（通过 flags）。

2. 函数原型

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>

ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);

3. 功能

从由 iov 和 iovcnt 指定的多个缓冲区中收集数据。
将收集到的数据写入到文件描述符 fd 指定的文件中，从绝对偏移量 offset 开始写入。
不修改文件的当前读写位置指针（lseek 位置）。
根据 flags 参数执行特定的 I/O 操作。

4. 参数

int fd: 有效的文件描述符。
const struct iovec *iov: 指向 struct iovec 数组的指针，描述了多个包含数据的缓冲区。
int iovcnt: iov 数组中元素的个数。

off_t offset: 在文件中开始写入的绝对偏移量（以字节为单位）。必须是非负数。

如果文件以 O_APPEND 模式打开，或者 flags 中设置了 RWF_APPEND，则 offset 参数会被忽略，数据总是被写入到文件末尾。

int flags: 控制 I/O 行为的标志。可以是以下值的按位或组合：

0: 默认行为，等同于 pwritev。
RWF_HIPRI: 尝试使用高优先级/实时 I/O。
RWF_DSYNC: 要求数据同步持久性。
RWF_SYNC: 要求文件同步持久性。
RWF_NOWAIT: 非阻塞。如果 I/O 无法立即完成，立即返回错误 EAGAIN。
RWF_APPEND: 强制将写入追加到文件末尾，即使文件没有以 O_APPEND 打开。

5. 返回值

成功时: 返回实际写入的总字节数。
失败时: 返回 -1，并设置 errno。

函数 3: pkey_mprotect

1. 函数介绍

pkey_mprotect 是 mprotect 系统调用的扩展，用于将一个内存区域与一个特定的内存保护键（Protection Key, pkey）相关联。

回忆一下 pkey_alloc/free：它们用于获取和释放 pkey 编号。pkey_mprotect 则是将这个编号应用到具体的内存区域上。

一旦内存区域通过 pkey_mprotect 与一个 pkey 关联，对该区域的访问权限就不仅受传统的 PROT_READ/PROT_WRITE/PROT_EXEC 控制，还受该 pkey 在 CPU 的 PKRU（Protection Key Rights User）寄存器中设置的权限控制。加粗样式

2. 函数原型

#define _GNU_SOURCE
#include <sys/mman.h> // 包含 MPK 相关常量

int pkey_mprotect(void *addr, size_t len, int prot, int pkey);

3. 功能

修改从地址 addr 开始、长度为 len 字节的内存区域的访问权限。
将该内存区域与保护键 pkey（由 pkey_alloc 获得）进行关联。
设置该区域的基本权限为 prot（PROT_READ, PROT_WRITE, PROT_EXEC 的组合）。

4. 参数

void *addr: 要修改的内存区域的起始地址。必须是页对齐的。
size_t len: 内存区域的长度（以字节为单位）。会向上舍入到最近的页边界。
int prot: 新的内存保护标志。可以是 PROT_NONE, PROT_READ, PROT_WRITE, PROT_EXEC 及其按位或组合。
int pkey: 通过 pkey_alloc 获得的保护键编号（0-15）。

5. 返回值

成功时: 返回 0。
失败时: 返回 -1，并设置 errno。

示例代码

示例 1：preadv2 和 pwritev2 的基本使用

// preadv2_pwritev2_example.c
#define _GNU_SOURCE
#include <sys/uio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define FILENAME "test_piov2.txt"

int main() {
    int fd;
    char buf1&#91;20], buf2&#91;30], buf3&#91;50];
    struct iovec iov_w&#91;2], iov_r&#91;3];
    ssize_t bytes_written, bytes_read;

    // 1. 创建并写入测试文件 (使用传统 write)
    fd = open(FILENAME, O_WRONLY | O_CREAT | O_TRUNC, 0644);
    if (fd == -1) {
        perror("open for write");
        exit(EXIT_FAILURE);
    }

    const char *data1 = "Part One: Hello, ";
    const char *data2 = "preadv2 and pwritev2 World!\n";
    iov_w&#91;0].iov_base = (void*)data1;
    iov_w&#91;0].iov_len = strlen(data1);
    iov_w&#91;1].iov_base = (void*)data2;
    iov_w&#91;1].iov_len = strlen(data2);

    bytes_written = writev(fd, iov_w, 2);
    if (bytes_written == -1) {
        perror("writev");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("Written %zd bytes using writev.\n", bytes_written);
    close(fd);

    // 2. 使用 preadv2 读取
    fd = open(FILENAME, O_RDONLY);
    if (fd == -1) {
        perror("open for read");
        exit(EXIT_FAILURE);
    }

    // 初始化读取缓冲区
    memset(buf1, '.', sizeof(buf1) - 1); buf1&#91;sizeof(buf1)-1] = '\0';
    memset(buf2, '.', sizeof(buf2) - 1); buf2&#91;sizeof(buf2)-1] = '\0';
    memset(buf3, '.', sizeof(buf3) - 1); buf3&#91;sizeof(buf3)-1] = '\0';

    iov_r&#91;0].iov_base = buf1;
    iov_r&#91;0].iov_len = sizeof(buf1) - 1;
    iov_r&#91;1].iov_base = buf2;
    iov_r&#91;1].iov_len = sizeof(buf2) - 1;
    iov_r&#91;2].iov_base = buf3;
    iov_r&#91;2].iov_len = sizeof(buf3) - 1;

    // 从偏移量 0 开始读取，使用默认标志
    bytes_read = preadv2(fd, iov_r, 3, 0, 0);
    if (bytes_read == -1) {
        perror("preadv2");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("\nRead %zd bytes using preadv2 from offset 0:\n", bytes_read);
    printf("Buffer 1: '%s'\n", buf1);
    printf("Buffer 2: '%s'\n", buf2);
    printf("Buffer 3: '%s'\n", buf3);

    close(fd);

    // 3. 使用 pwritev2 追加写入
    fd = open(FILENAME, O_WRONLY); // 不用 O_APPEND
    if (fd == -1) {
        perror("open for write (again)");
        exit(EXIT_FAILURE);
    }

    const char *append1 = "Appended via ";
    const char *append2 = "pwritev2 with RWF_APPEND flag.\n";
    struct iovec iov_a&#91;2];
    iov_a&#91;0].iov_base = (void*)append1;
    iov_a&#91;0].iov_len = strlen(append1);
    iov_a&#91;1].iov_base = (void*)append2;
    iov_a&#91;1].iov_len = strlen(append2);

    // 使用 RWF_APPEND 标志强制追加，忽略 offset
    bytes_written = pwritev2(fd, iov_a, 2, 0, RWF_APPEND);
    if (bytes_written == -1) {
        perror("pwritev2 with RWF_APPEND");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("\nAppended %zd bytes using pwritev2 with RWF_APPEND.\n", bytes_written);

    close(fd);

    // 4. 验证文件内容
    printf("\n--- Final file content ---\n");
    fd = open(FILENAME, O_RDONLY);
    if (fd != -1) {
        char final_buf&#91;200];
        ssize_t n = read(fd, final_buf, sizeof(final_buf) - 1);
        if (n > 0) {
            final_buf&#91;n] = '\0';
            printf("%s", final_buf);
        }
        close(fd);
    }

    // unlink(FILENAME); // 可选：清理文件
    return 0;
}

代码解释:

创建一个测试文件，并使用 writev 写入一些初始内容。

重新打开文件进行读取。

使用 preadv2(fd, iov_r, 3, 0, 0) 从文件偏移量 0 开始，将数据分散读入三个缓冲区。flags 为 0，表示默认行为。

打开文件进行写入（非 O_APPEND 模式）。

使用 pwritev2(fd, iov_a, 2, 0, RWF_APPEND) 将数据写入文件。尽管 offset 是 0，但由于使用了 RWF_APPEND 标志，数据被追加到了文件末尾。

重新读取并打印文件内容以验证操作结果。

示例 2：pkey_mprotect 结合 pkey_alloc/free 使用

// pkey_mprotect_example.c
#define _GNU_SOURCE
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <setjmp.h>

static jmp_buf jmp_env;
static volatile sig_atomic_t sigsegv_caught = 0;

void sigsegv_handler(int sig) {
    sigsegv_caught = 1;
    longjmp(jmp_env, 1);
}

// Conceptual PKRU manipulation (requires inline assembly in real code)
// For demonstration, we'll just print what would happen.
void set_pkey_access(int pkey, int disable_access) {
    printf("  &#91;Concept] Modifying PKRU for pkey %d: %s\n",
           pkey, disable_access ? "DISABLE access" : "ENABLE access");
    // Real code would involve inline assembly to write to PKRU register
}

int main() {
    // Check for MPK support conceptually
    if (sysconf(_SC_MPKEY) <= 0) {
        fprintf(stderr, "MPK not supported by sysconf.\n");
        exit(EXIT_FAILURE);
    }

    struct sigaction sa;
    sa.sa_handler = sigsegv_handler;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_RESTART;
    if (sigaction(SIGSEGV, &sa, NULL) == -1) {
        perror("sigaction");
        exit(EXIT_FAILURE);
    }

    size_t page_size = getpagesize();
    size_t len = page_size;
    void *addr;

    // 1. Allocate memory
    addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (addr == MAP_FAILED) {
        perror("mmap");
        exit(EXIT_FAILURE);
    }
    printf("Allocated %zu bytes at %p\n", len, addr);

    // 2. Write some data
    strcpy((char*)addr, "This memory is protected by a pkey.");
    printf("Written data: %s\n", (char*)addr);

    // 3. Allocate a protection key
    int pkey = pkey_alloc(0, 0);
    if (pkey == -1) {
        if (errno == EOPNOTSUPP) {
            printf("MPK not supported on this hardware/kernel.\n");
            munmap(addr, len);
            exit(EXIT_FAILURE);
        } else {
            perror("pkey_alloc");
            munmap(addr, len);
            exit(EXIT_FAILURE);
        }
    }
    printf("Allocated pkey: %d\n", pkey);

    // 4. Associate memory with the pkey using pkey_mprotect
    printf("\n--- Associating memory with pkey %d ---\n", pkey);
    if (pkey_mprotect(addr, len, PROT_READ | PROT_WRITE, pkey) == -1) {
        perror("pkey_mprotect");
        pkey_free(pkey);
        munmap(addr, len);
        exit(EXIT_FAILURE);
    }
    printf("Memory successfully associated with pkey %d.\n", pkey);

    // 5. Disable access via PKRU (conceptual)
    printf("\n--- Disabling access to pkey %d via PKRU ---\n", pkey);
    set_pkey_access(pkey, 1); // Conceptual call

    // 6. Try to access protected memory (should trigger SIGSEGV)
    printf("\n--- Attempting to READ from protected memory ---\n");
    sigsegv_caught = 0;

    if (setjmp(jmp_env) == 0) {
        printf("  Trying to read from %p...\n", addr);
        volatile char first_char = *((char*)addr);
        printf("  ERROR: Read succeeded (first char: %c). This should not happen!\n", first_char);
    } else {
        if (sigsegv_caught) {
            printf("  SUCCESS: SIGSEGV caught. Access correctly denied by pkey.\n");
        } else {
            printf("  Unexpected longjmp.\n");
        }
    }

    // 7. Re-enable access
    printf("\n--- Re-enabling access to pkey %d via PKRU ---\n", pkey);
    set_pkey_access(pkey, 0); // Conceptual call

    // 8. Try to access memory again (should succeed)
    printf("\n--- Attempting to access memory again (should succeed now) ---\n");
    printf("  Reading from %p: %.50s\n", addr, (char*)addr);

    // 9. Cleanup
    if (pkey_free(pkey) == -1) {
        perror("pkey_free");
    }
    if (munmap(addr, len) == -1) {
        perror("munmap");
    }

    printf("\nPkey_mprotect example finished.\n");
    return 0;
}

**代码解释 **(概念性):

设置信号处理和 setjmp/longjmp 用于捕获 SIGSEGV。

使用 mmap 分配一页内存。

写入一些测试数据。

调用 pkey_alloc(0, 0) 获取一个 pkey。

关键步骤: 调用 pkey_mprotect(addr, len, PROT_READ | PROT_WRITE, pkey) 将分配的内存区域与获取的 pkey 关联起来。

概念性操作: 模拟通过修改 PKRU 寄存器来禁用对这个 pkey 的访问。

尝试读取受保护的内存，预期会触发 SIGSEGV。

概念性操作: 模拟重新启用对这个 pkey 的访问。

再次尝试读取，这次应该成功。

清理资源（释放 pkey 和内存）。

重要提示与注意事项:

内核版本:

preadv2/pwritev2: Linux 内核 4.6+。
pkey_mprotect/pkey_alloc/pkey_free: Linux 内核 4.9+ (MPK)。

glibc 版本: 需要 glibc 2.27+ 才能直接使用这些函数。

硬件支持: pkey_* 函数需要 CPU 支持（如 Intel x86_64 Skylake 及更新架构）。

_GNU_SOURCE: 必须定义此宏才能使用这些扩展函数。

flags 参数: preadv2/pwritev2 的 flags 提供了强大的 I/O 控制能力，特别是 RWF_NOWAIT（非阻塞）和 RWF_APPEND。

pkey_mprotect 是核心: 它是将 pkey 机制应用到实际内存区域的关键步骤。仅仅 pkey_alloc 是不够的。

PKRU 操作: 真正控制 pkey 权限需要直接操作 CPU 的 PKRU 寄存器，这通常需要内联汇编，比较复杂。

错误处理: 始终检查返回值，特别是 pkey_* 函数可能返回 EOPNOTSUPP。

总结:

preadv2 和 pwritev2 是对现有 I/O 系统调用的有力增强，通过引入 flags 参数，提供了更细粒度的控制，如非阻塞 I/O 和强制追加写入。

pkey_mprotect 是内存保护键（MPK）技术的核心 API 之一，它允许将特定的内存区域与一个 pkey 绑定，从而实现比传统 mprotect 更快速、更灵活的内存访问控制。结合 pkey_alloc/free 和对 PKRU 寄存器的操作，可以构建出高性能的内存安全机制。

这三个函数都代表了 Linux 系统编程向更高性能、更细粒度控制发展的趋势。

preadv2 函数

1. 函数介绍

preadv2 是 preadv 的增强版本，支持额外的标志参数，提供更多的控制选项。它是Linux 4.6引入的新特性。

2. 函数原型

#define _GNU_SOURCE
#include <sys/uio.h>
ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt, off_t offset, int flags);

3. 功能

与 preadv 类似，但从指定位置读取数据到多个缓冲区，并支持额外的控制标志。

4. 参数

int fd: 文件描述符
*const struct iovec iov: iovec结构体数组
int iovcnt: iov数组元素个数
off_t offset: 文件偏移量
int flags: 控制标志（如RWF_HIPRI, RWF_DSYNC等）

5. 返回值

成功: 返回实际读取的总字节数
失败: 返回-1，并设置errno

6. 相似函数，或关联函数

preadv: 基本版本
pwritev2: 对应的写入函数
read: 基本读取函数

7. 示例代码

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

/**
 * 演示preadv2的基本使用
 * 注意：需要Linux 4.6+内核支持
 */
int demo_preadv2_basic() {
    int fd;
    struct iovec iov&#91;2];
    char buf1&#91;30], buf2&#91;20];
    ssize_t bytes_read;
    
    printf("=== preadv2 基本使用示例 ===\n");
    
    // 创建测试文件
    fd = open("test_preadv2.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd == -1) {
        perror("创建测试文件失败");
        return -1;
    }
    
    const char *test_data = "This is test data for preadv2 function demonstration.";
    write(fd, test_data, strlen(test_data));
    close(fd);
    
    // 打开文件进行读取
    fd = open("test_preadv2.txt", O_RDONLY);
    if (fd == -1) {
        perror("打开文件失败");
        return -1;
    }
    
    // 设置iovec数组
    iov&#91;0].iov_base = buf1;
    iov&#91;0].iov_len = sizeof(buf1) - 1;
    iov&#91;1].iov_base = buf2;
    iov&#91;1].iov_len = sizeof(buf2) - 1;
    
    // 使用preadv2读取数据（flags设为0表示默认行为）
    bytes_read = preadv2(fd, iov, 2, 0, 0);
    if (bytes_read == -1) {
        if (errno == ENOSYS) {
            printf("系统不支持 preadv2 函数\n");
            close(fd);
            unlink("test_preadv2.txt");
            return 0;
        }
        perror("preadv2 失败");
        close(fd);
        unlink("test_preadv2.txt");
        return -1;
    }
    
    printf("preadv2 成功读取 %zd 字节\n", bytes_read);
    
    // 添加字符串结束符并显示结果
    buf1&#91;iov&#91;0].iov_len] = '\0';
    buf2&#91;iov&#91;1].iov_len] = '\0';
    
    printf("缓冲区1: %s\n", buf1);
    printf("缓冲区2: %s\n", buf2);
    
    close(fd);
    unlink("test_preadv2.txt");
    return 0;
}

/**
 * 演示preadv2的高级特性（如果系统支持）
 */
int demo_preadv2_advanced() {
    int fd;
    struct iovec iov&#91;1];
    char buffer&#91;100];
    ssize_t bytes_read;
    
    printf("\n=== preadv2 高级特性示例 ===\n");
    printf("preadv2 支持的标志包括:\n");
    printf("  RWF_HIPRI: 高优先级I/O\n");
    printf("  RWF_DSYNC: 数据同步写入\n");
    printf("  RWF_SYNC:  同步写入\n");
    printf("  RWF_NOWAIT: 非阻塞操作\n");
    printf("  RWF_APPEND: 追加模式写入\n");
    
    // 创建测试文件
    fd = open("advanced_test.txt", O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd == -1) {
        perror("创建测试文件失败");
        return -1;
    }
    
    const char *test_data = "Advanced preadv2 test data for feature demonstration.";
    write(fd, test_data, strlen(test_data));
    close(fd);
    
    fd = open("advanced_test.txt", O_RDONLY);
    if (fd == -1) {
        perror("打开文件失败");
        return -1;
    }
    
    // 设置iovec
    iov&#91;0].iov_base = buffer;
    iov&#91;0].iov_len = sizeof(buffer) - 1;
    
    // 尝试使用RWF_NOWAIT标志（非阻塞读取）
    bytes_read = preadv2(fd, iov, 1, 0, RWF_NOWAIT);
    if (bytes_read == -1) {
        if (errno == EAGAIN || errno == EWOULDBLOCK) {
            printf("非阻塞操作：数据暂时不可用\n");
        } else if (errno == ENOSYS) {
            printf("系统不支持 RWF_NOWAIT 标志\n");
        } else {
            printf("preadv2 with RWF_NOWAIT 失败: %s\n", strerror(errno));
        }
    } else {
        buffer&#91;bytes_read] = '\0';
        printf("非阻塞读取成功: %s\n", buffer);
    }
    
    close(fd);
    unlink("advanced_test.txt");
    return 0;
}

int main() {
    printf("preadv2 需要 Linux 4.6+ 内核支持\n");
    
    if (demo_preadv2_basic() == 0) {
        demo_preadv2_advanced();
        printf("\n=== preadv2 使用总结 ===\n");
        printf("优点：支持额外控制标志，更灵活的I/O控制\n");
        printf("注意：需要较新内核版本支持\n");
    }
    return 0;
}

2025-08-16

Linux系统编程

pread系统调用及示例

我们继续学习 Linux 系统编程中的重要函数。这次我们介绍 pread 和 pwrite 函数，它们是 read 和 write 系统调用的增强版本，允许在单次调用中指定文件偏移量，而不会改变文件的当前读写位置指针。

1. 函数介绍

pread (Positioned Read) 和 pwrite (Positioned Write) 是 Linux 系统调用，它们结合了 read/write 的数据传输功能和 lseek 的定位功能。

pread: 从文件描述符 fd 关联的文件中，从指定的偏移量 offset 处开始读取 count 个字节的数据，并将其存储到缓冲区 buf 中。关键点：此操作不会修改文件的当前读写位置指针（即调用 lseek(fd, 0, SEEK_CUR) 返回的值保持不变）。
pwrite: 将 count 个字节的数据从缓冲区 buf 写入到文件描述符 fd 关联的文件中，从指定的偏移量 offset 处开始写入。关键点：此操作也不会修改文件的当前读写位置指针。

你可以把它们想象成 lseek + read 或 lseek + write 的原子性组合，但又不影响文件的“书签”（当前文件偏移量）。

2. 函数原型

#include <unistd.h> // 必需

// 从指定偏移量读取
ssize_t pread(int fd, void *buf, size_t count, off_t offset);

// 向指定偏移量写入
ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset);

3. 功能

pread(fd, buf, count, offset):

将文件 fd 的读取位置临时设置到 offset。
从该位置读取最多 count 个字节到 buf。
读取完成后，文件的全局读写位置指针保持不变。

pwrite(fd, buf, count, offset):

将文件 fd 的写入位置临时设置到 offset。
从 buf 写入 count 个字节到该位置。
写入完成后，文件的全局读写位置指针保持不变。

这种“原子性定位并操作”的特性在多线程环境中特别有用，可以避免多个线程同时操作同一个文件描述符的当前偏移量而导致的竞争条件（race condition）。

4. 参数

这两个函数的参数非常相似：

int fd: 有效的文件描述符。
void *buf (pread) / const void *buf (pwrite): 指向数据缓冲区的指针。
size_t count: 要读取/写入的字节数。
off_t offset: 在文件中进行读取/写入操作的绝对偏移量（从文件开头算起的字节数）。

5. 返回值

成功时:

返回实际读取/写入的字节数。这个数可能小于请求的 count（例如，在读取时接近文件末尾，或在写入时遇到磁盘空间不足）。
对于 pread，如果返回 0，通常表示偏移量已在文件末尾或没有更多数据。

失败时:

返回 -1，并设置全局变量 errno 来指示具体的错误原因（例如 EBADF fd 无效，EINVAL offset 无效，EIO I/O 错误等）。

6. 相似函数，或关联函数

read, write: 基础的读写函数，它们的操作基于并会修改文件的当前偏移量。
lseek: 用于显式地移动文件的当前读写位置指针。pread/pwrite 内部可能使用了类似 lseek 的机制，但对用户是透明的，且不影响全局偏移量。
mmap: 另一种访问文件内容的方式，通过内存映射将文件内容映射到进程地址空间。

7. 示例代码

示例 1：基本 pread 和 pwrite 使用

这个例子演示了如何使用 pread 从文件的不同位置读取数据，以及使用 pwrite 向文件的不同位置写入数据，同时文件的当前偏移量保持不变。

#include <unistd.h>  // pread, pwrite, open, close, lseek
#include <fcntl.h>   // O_RDWR, O_CREAT
#include <stdio.h>   // perror, printf
#include <stdlib.h>  // exit
#include <string.h>  // strlen

int main() {
    int fd;
    const char *filename = "pread_pwrite_example.txt";
    const char *initial_data = "This is the initial content of the file.\nIt spans multiple lines.\n";
    const char *write_data1 = "&#91;OVERWRITTEN_PART_1]";
    const char *write_data2 = "&#91;OVERWRITTEN_PART_2]";
    char read_buffer&#91;100];
    ssize_t bytes_rw;
    off_t current_offset;

    // 1. 创建并写入初始数据
    fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
    if (fd == -1) {
        perror("open for creation");
        exit(EXIT_FAILURE);
    }

    if (write(fd, initial_data, strlen(initial_data)) == -1) {
        perror("write initial data");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("Created file '%s' and wrote initial data.\n", filename);

    // 2. 获取并打印当前文件偏移量 (应在文件末尾)
    current_offset = lseek(fd, 0, SEEK_CUR);
    if (current_offset == -1) {
        perror("lseek to get current offset");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("Current file offset after initial write: %ld\n", (long)current_offset);

    // --- 使用 pwrite 进行写入 ---
    printf("\n--- Using pwrite ---\n");
    // 在偏移量 5 处写入数据
    bytes_rw = pwrite(fd, write_data1, strlen(write_data1), 5);
    if (bytes_rw == -1) {
        perror("pwrite 1");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("pwrite 1: Wrote %zd bytes at offset 5.\n", bytes_rw);

    // 在偏移量 30 处写入另一部分数据
    bytes_rw = pwrite(fd, write_data2, strlen(write_data2), 30);
    if (bytes_rw == -1) {
        perror("pwrite 2");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("pwrite 2: Wrote %zd bytes at offset 30.\n", bytes_rw);

    // 3. 再次检查当前文件偏移量 (应该没有改变)
    off_t offset_after_pwrite = lseek(fd, 0, SEEK_CUR);
    if (offset_after_pwrite == -1) {
        perror("lseek after pwrite");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("Current file offset after pwrite calls: %ld (Should be same as before)\n",
           (long)offset_after_pwrite);

    // --- 使用 pread 进行读取 ---
    printf("\n--- Using pread ---\n");
    // 从偏移量 0 开始读取 20 个字节
    bytes_rw = pread(fd, read_buffer, 20, 0);
    if (bytes_rw == -1) {
        perror("pread 1");
        close(fd);
        exit(EXIT_FAILURE);
    }
    read_buffer&#91;bytes_rw] = '\0'; // 确保字符串结束
    printf("pread 1: Read %zd bytes from offset 0: '%s'\n", bytes_rw, read_buffer);

    // 从偏移量 15 开始读取 25 个字节
    bytes_rw = pread(fd, read_buffer, 25, 15);
    if (bytes_rw == -1) {
        perror("pread 2");
        close(fd);
        exit(EXIT_FAILURE);
    }
    read_buffer&#91;bytes_rw] = '\0';
    printf("pread 2: Read %zd bytes from offset 15: '%s'\n", bytes_rw, read_buffer);

    // 从偏移量 50 开始读取 10 个字节 (可能读到文件末尾)
    bytes_rw = pread(fd, read_buffer, 10, 50);
    if (bytes_rw == -1) {
        perror("pread 3");
        close(fd);
        exit(EXIT_FAILURE);
    } else if (bytes_rw == 0) {
        printf("pread 3: Read %zd bytes from offset 50 (likely EOF).\n", bytes_rw);
    } else {
        read_buffer&#91;bytes_rw] = '\0';
        printf("pread 3: Read %zd bytes from offset 50: '%s'\n", bytes_rw, read_buffer);
    }

    // 4. 最后再次确认文件偏移量未变
    off_t final_offset = lseek(fd, 0, SEEK_CUR);
    if (final_offset == -1) {
        perror("lseek to get final offset");
        close(fd);
        exit(EXIT_FAILURE);
    }
    printf("\nFinal file offset: %ld (Should still be the same)\n", (long)final_offset);

    if (close(fd) == -1) {
        perror("close");
        exit(EXIT_FAILURE);
    }

    printf("File operations completed. Check the file content.\n");
    return 0;
}

代码解释:

创建一个文件并写入一些初始数据。

使用 lseek(fd, 0, SEEK_CUR) 获取并打印当前文件偏移量（应该在文件末尾）。

pwrite 操作:

调用 pwrite 两次，分别在偏移量 5 和 30 处写入数据。
每次调用后，再次使用 lseek 检查文件偏移量，确认它没有改变。

pread 操作:

调用 pread 三次，分别从偏移量 0、15 和 50 处读取数据。
打印读取到的内容。

最后再次检查文件偏移量，确认在整个过程中它始终保持不变。

关闭文件。

示例 2：多线程环境中的 pread/pwrite

这个例子（概念性地）说明了 pread/pwrite 在多线程场景下的优势。虽然完整的多线程代码比较复杂，但我们可以通过伪代码和解释来理解。

// 假想的多线程程序片段

#include <pthread.h> // POSIX 线程
#include <unistd.h>  // pread, pwrite
// ... 其他包含 ...

int shared_file_fd; // 所有线程共享的文件描述符

// 线程函数 1: 读取文件头部
void* thread_read_header(void *arg) {
    char header_buf&#91;HEADER_SIZE];
    ssize_t bytes_read;

    // 线程 1 总是从偏移量 0 读取头部
    // 使用 pread 确保不影响其他线程的文件位置
    bytes_read = pread(shared_file_fd, header_buf, HEADER_SIZE, 0);
    if (bytes_read > 0) {
        // 处理头部数据...
        process_header(header_buf, bytes_read);
    }
    return NULL;
}

// 线程函数 2: 读取文件尾部
void* thread_read_footer(void *arg) {
    char footer_buf&#91;FOOTER_SIZE];
    ssize_t bytes_read;
    off_t file_size;

    // 获取文件大小 (可能需要预先获取或用 fstat)
    file_size = get_file_size_somehow(shared_file_fd);

    // 线程 2 总是从文件末尾倒数的位置读取尾部
    // 使用 pread 确保不影响其他线程的文件位置
    bytes_read = pread(shared_file_fd, footer_buf, FOOTER_SIZE, file_size - FOOTER_SIZE);
    if (bytes_read > 0) {
        // 处理尾部数据...
        process_footer(footer_buf, bytes_read);
    }
    return NULL;
}

// 线程函数 3: 在文件中间某处写入日志
void* thread_write_log(void *arg) {
    const char *log_msg = "Log entry from thread 3\n";
    off_t write_offset = (off_t)arg; // 假设偏移量通过 arg 传入

    // 线程 3 在指定位置写入日志
    // 使用 pwrite 确保不影响其他线程的文件位置
    ssize_t bytes_written = pwrite(shared_file_fd, log_msg, strlen(log_msg), write_offset);
    if (bytes_written == -1) {
        perror("pwrite in thread 3");
    } else {
        printf("Thread 3 wrote %zd bytes at offset %ld\n", bytes_written, (long)write_offset);
    }
    return NULL;
}

// 主函数 (概念性)
int main() {
    // ... 打开文件 shared_file_fd ...

    pthread_t t1, t2, t3;

    // 创建线程
    pthread_create(&t1, NULL, thread_read_header, NULL);
    pthread_create(&t2, NULL, thread_read_footer, NULL);
    pthread_create(&t3, NULL, thread_write_log, (void*)MIDDLE_OFFSET); // 传递写入偏移量

    // 等待线程完成
    pthread_join(t1, NULL);
    pthread_join(t2, NULL);
    pthread_join(t3, NULL);

    // ... 关闭文件 ...
    return 0;
}

代码解释 (概念性):

假设有多个线程共享同一个文件描述符 shared_file_fd。2. 线程 1 (thread_read_header): 需要读取文件头部。它使用 pread(fd, buf, size, 0)，明确指定从偏移量 0 开始读取。这不会影响文件的全局偏移量，因此其他线程可以同时进行其他操作。3. 线程 2 (thread_read_footer): 需要读取文件尾部。它使用 pread(fd, buf, size, file_size - size)，明确指定从文件末尾开始读取。同样，不影响全局偏移量。4. 线程 3 (thread_write_log): 需要在文件中间写入日志。它使用 pwrite(fd, buf, size, offset)，明确指定写入位置。不影响全局偏移量。5. 如果使用传统的 lseek + read/write，线程在 lseek 和 read/write 之间可能会被切换，导致线程间相互干扰文件偏移量，产生不可预测的结果。pread/pwrite 的原子性定位和操作避免了这个问题。

总结:

pread 和 pwrite 是非常实用的系统调用，特别是在需要随机访问文件或在多线程环境中操作文件时。它们通过将定位和 I/O 操作合并为一个原子步骤，并且不修改文件的全局状态，简化了编程并提高了安全性。理解它们的关键在于掌握它们与传统 read/write + lseek 组合的区别。

2025-08-16

Linux系统编程

prlimit64系统调用及示例

prlimit64 函数

1. 函数介绍

prlimit64 用于获取和设置进程的资源限制。它是 getrlimit 和 setrlimit 的增强版本，支持64位资源限制值。

2. 函数原型

#define _GNU_SOURCE
#include <sys/resource.h>
int prlimit64(pid_t pid, int resource, const struct rlimit64 *new_limit, struct rlimit64 *old_limit);

3. 功能

获取和/或设置指定进程的资源限制。可以用于控制进程使用的各种系统资源，如内存、文件描述符、CPU时间等。

4. 参数

pid_t pid: 目标进程ID（0表示当前进程）
int resource: 资源类型（如RLIMIT_AS, RLIMIT_NOFILE等）
*const struct rlimit64 new_limit: 新的资源限制（NULL表示不设置）
*struct rlimit64 old_limit: 保存旧的资源限制（NULL表示不获取）

5. 返回值

成功: 返回0
失败: 返回-1，并设置errno

6. 相似函数，或关联函数

getrlimit/setrlimit: 32位版本
ulimit: shell内置命令
getrusage: 获取资源使用情况

7. 示例代码

#define _GNU_SOURCE
#include <sys/resource.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>

/**
 * 显示资源限制信息
 */
void display_resource_limit(int resource, const char *name) {
    struct rlimit64 limit;
    
    if (prlimit64(0, resource, NULL, &limit) == 0) {
        printf("%-20s: ", name);
        if (limit.rlim_cur == RLIM64_INFINITY) {
            printf("无限制");
        } else {
            printf("软限制=%lld", (long long)limit.rlim_cur);
        }
        
        if (limit.rlim_max == RLIM64_INFINITY) {
            printf(", 硬限制=无限制\n");
        } else {
            printf(", 硬限制=%lld\n", (long long)limit.rlim_max);
        }
    } else {
        printf("%-20s: 获取失败 (%s)\n", name, strerror(errno));
    }
}

/**
 * 演示prlimit64获取资源限制
 */
int demo_prlimit64_get() {
    printf("=== prlimit64 获取资源限制示例 ===\n");
    
    // 显示常见的资源限制
    display_resource_limit(RLIMIT_AS, "虚拟内存");
    display_resource_limit(RLIMIT_CORE, "核心文件大小");
    display_resource_limit(RLIMIT_CPU, "CPU时间");
    display_resource_limit(RLIMIT_DATA, "数据段大小");
    display_resource_limit(RLIMIT_FSIZE, "文件大小");
    display_resource_limit(RLIMIT_NOFILE, "文件描述符数");
    display_resource_limit(RLIMIT_NPROC, "进程数");
    display_resource_limit(RLIMIT_STACK, "栈大小");
    
    return 0;
}

/**
 * 演示prlimit64设置资源限制
 */
int demo_prlimit64_set() {
    struct rlimit64 old_limit, new_limit;
    int result;
    
    printf("\n=== prlimit64 设置资源限制示例 ===\n");
    
    // 获取当前文件描述符限制
    if (prlimit64(0, RLIMIT_NOFILE, NULL, &old_limit) == 0) {
        printf("当前文件描述符限制: 软限制=%lld, 硬限制=%lld\n",
               (long long)old_limit.rlim_cur, (long long)old_limit.rlim_max);
    }
    
    // 尝试设置新的文件描述符限制
    new_limit.rlim_cur = 1024;  // 软限制
    new_limit.rlim_max = 2048;  // 硬限制
    
    result = prlimit64(0, RLIMIT_NOFILE, &new_limit, NULL);
    if (result == 0) {
        printf("成功设置新的文件描述符限制\n");
        
        // 验证设置结果
        if (prlimit64(0, RLIMIT_NOFILE, NULL, &old_limit) == 0) {
            printf("设置后文件描述符限制: 软限制=%lld, 硬限制=%lld\n",
                   (long long)old_limit.rlim_cur, (long long)old_limit.rlim_max);
        }
    } else {
        printf("设置文件描述符限制失败: %s\n", strerror(errno));
        printf("注意：可能需要root权限或在硬限制范围内调整\n");
    }
    
    // 演示内存限制设置
    printf("\n尝试设置虚拟内存限制:\n");
    new_limit.rlim_cur = 100 * 1024 * 1024;  // 100MB
    new_limit.rlim_max = 200 * 1024 * 1024;  // 200MB
    
    result = prlimit64(0, RLIMIT_AS, &new_limit, NULL);
    if (result == 0) {
        printf("成功设置虚拟内存限制为100MB\n");
    } else {
        printf("设置虚拟内存限制失败: %s\n", strerror(errno));
    }
    
    return 0;
}

/**
 * 演示资源限制的实际影响
 */
int demo_resource_limit_effect() {
    struct rlimit64 limit;
    int fd_array&#91;100];
    int i, fd_count = 0;
    
    printf("\n=== 资源限制实际影响演示 ===\n");
    
    // 获取当前文件描述符限制
    if (prlimit64(0, RLIMIT_NOFILE, NULL, &limit) == 0) {
        printf("当前文件描述符软限制: %lld\n", (long long)limit.rlim_cur);
    }
    
    // 尝试打开大量文件来测试限制
    printf("尝试打开文件...\n");
    for (i = 0; i < 100; i++) {
        char filename&#91;32];
        snprintf(filename, sizeof(filename), "test_file_%d.txt", i);
        
        int fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0644);
        if (fd == -1) {
            printf("在打开第 %d 个文件时失败: %s\n", i, strerror(errno));
            printf("当前已打开 %d 个文件\n", fd_count);
            break;
        }
        
        fd_array&#91;fd_count++] = fd;
        
        if (i % 20 == 0) {
            printf("已成功打开 %d 个文件\n", fd_count);
        }
    }
    
    // 关闭所有打开的文件
    for (i = 0; i < fd_count; i++) {
        close(fd_array&#91;i]);
        char filename&#91;32];
        snprintf(filename, sizeof(filename), "test_file_%d.txt", i);
        unlink(filename);
    }
    
    printf("清理完成\n");
    return 0;
}

int main() {
    if (demo_prlimit64_get() == 0) {
        demo_prlimit64_set();
        demo_resource_limit_effect();
        printf("\n=== prlimit64 使用总结 ===\n");
        printf("用途：控制进程资源使用，防止资源耗尽\n");
        printf("注意：某些限制需要特权权限才能修改\n");
        printf("常见资源类型：内存、文件描述符、CPU时间、进程数等\n");
    }
    return 0;
}

总结

这些系统调用提供了强大的文件I/O和资源管理功能：

文件I/O函数特点：

pread/pwrite: 原子性的定位读写，线程安全2. preadv/pwritev: 分散/聚集I/O，一次系统调用处理多个缓冲区3. preadv2/pwritev2: 增强版本，支持额外控制标志

资源管理函数：

prlimit64: 现代化的资源限制管理，支持64位值

使用建议：

选择合适的函数: 根据具体需求选择单缓冲区或多缓冲区版本2. 注意权限要求: 某些操作需要相应权限3. 错误处理: 始终检查返回值并处理错误4. 性能考虑: 合理使用分散/聚集I/O减少系统调用次数

2025-08-16

Linux系统编程

process_vm_writev系统调用及示例

process_vm_readv/process_vm_writev 函数详解

函数介绍

process_vm_readv 和 process_vm_writev 是Linux系统提供的两个系统调用，用于在不同进程之间直接传输数据。它们允许一个进程直接读取或写入另一个进程的内存空间，而无需通过传统的管道、套接字等IPC机制。这种直接内存访问方式提供了更高的性能，特别适用于调试工具、进程监控、内存分析等场景。

函数原型

#define _GNU_SOURCE
#include <sys/uio.h>

ssize_t process_vm_readv(pid_t pid,
                        const struct iovec *local_iov,
                        unsigned long liovcnt,
                        const struct iovec *remote_iov,
                        unsigned long riovcnt,
                        unsigned long flags);

ssize_t process_vm_writev(pid_t pid,
                         const struct iovec *local_iov,
                         unsigned long liovcnt,
                         const struct iovec *remote_iov,
                         unsigned long riovcnt,
                         unsigned long flags);

功能

process_vm_readv: 从指定进程(pid)的内存中读取数据到当前进程的内存中
process_vm_writev: 将当前进程的内存数据写入到指定进程(pid)的内存中
这两个函数都支持分散/聚集I/O操作，可以同时处理多个不连续的内存区域
数据传输直接在用户空间进行，避免了内核缓冲区的复制，提高了性能

参数

共同参数说明：

pid_t pid: 目标进程的进程ID

必须是正在运行的进程
调用进程必须有权限访问该进程（相同用户或具有CAP_SYS_PTRACE权限）

*const struct iovec local_iov: 本地内存区域描述符数组

描述当前进程中用于读写操作的内存缓冲区
每个iovec结构包含基地址和长度

unsigned long liovcnt: 本地iovec数组的元素个数

指定local_iov数组中有效元素的数量

*const struct iovec remote_iov: 远程内存区域描述符数组

描述目标进程中用于读写操作的内存地址
每个iovec结构包含基地址和长度

unsigned long riovcnt: 远程iovec数组的元素个数

指定remote_iov数组中有效元素的数量

unsigned long flags: 标志位（保留字段）

当前必须设置为0

iovec结构体定义：

struct iovec {
    void  *iov_base;    // 内存区域的起始地址
    size_t iov_len;     // 内存区域的长度（字节数）
};

返回值

成功时：

返回实际传输的字节数
可能小于请求的总字节数（部分传输）

失败时：

返回-1，并设置errno错误码

常见错误码：

EACCES: 没有权限访问目标进程内存
EFAULT: 指定的内存地址范围无效
EINVAL: 参数无效（如flags非0，iovcnt过大等）
ENOMEM: 内存不足
EPERM: 没有权限操作目标进程
ESRCH: 目标进程不存在或已终止

相似函数或关联函数

相似函数：

readv/writev: 在单个进程内进行分散/聚集I/O操作
preadv/pwritev: 带偏移量的分散/聚集I/O操作
ptrace: 更通用的进程调试和控制接口

关联函数：

kill: 向进程发送信号
wait/waitpid: 等待子进程状态变化
getpid/getppid: 获取进程ID信息

示例代码

示例1：基础内存读取示例

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

/**
 * 读取目标进程内存的简单示例
 * 注意：需要知道目标进程的确切内存地址
 */
int read_target_process_memory(pid_t target_pid, void *remote_addr, size_t size) {
    char *local_buffer;
    struct iovec local_iov&#91;1];
    struct iovec remote_iov&#91;1];
    ssize_t bytes_transferred;
    
    // 分配本地缓冲区
    local_buffer = malloc(size);
    if (!local_buffer) {
        perror("malloc failed");
        return -1;
    }
    
    // 设置本地iovec（当前进程的缓冲区）
    local_iov&#91;0].iov_base = local_buffer;
    local_iov&#91;0].iov_len = size;
    
    // 设置远程iovec（目标进程的内存地址）
    remote_iov&#91;0].iov_base = remote_addr;
    remote_iov&#91;0].iov_len = size;
    
    // 执行读取操作
    bytes_transferred = process_vm_readv(target_pid,
                                        local_iov, 1,      // 本地1个iovec
                                        remote_iov, 1,     // 远程1个iovec
                                        0);                // flags必须为0
    
    if (bytes_transferred == -1) {
        printf("process_vm_readv failed: %s\n", strerror(errno));
        free(local_buffer);
        return -1;
    }
    
    printf("成功读取 %zd 字节数据\n", bytes_transferred);
    printf("读取的数据内容: ");
    for (size_t i = 0; i < bytes_transferred && i < 50; i++) {
        if (local_buffer&#91;i] >= 32 && local_buffer&#91;i] <= 126) {
            putchar(local_buffer&#91;i]);
        } else {
            printf("\\x%02x", (unsigned char)local_buffer&#91;i]);
        }
    }
    printf("\n");
    
    free(local_buffer);
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 4) {
        printf("使用方法: %s <目标进程PID> <内存地址> <读取大小>\n", argv&#91;0]);
        printf("示例: %s 1234 0x601040 100\n", argv&#91;0]);
        return 1;
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    unsigned long addr = strtoul(argv&#91;2], NULL, 0);
    size_t size = strtoul(argv&#91;3], NULL, 0);
    
    printf("尝试读取进程 %d 的内存地址 0x%lx，大小 %zu 字节\n", 
           target_pid, addr, size);
    
    return read_target_process_memory(target_pid, (void*)addr, size);
}

示例2：批量内存操作示例

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define MAX_REGIONS 10

/**
 * 批量读取多个内存区域
 */
typedef struct {
    void *remote_addr;    // 远程地址
    size_t size;          // 区域大小
    char *data;           // 读取到的数据
} memory_region_t;

int batch_read_memory_regions(pid_t target_pid, memory_region_t *regions, int count) {
    struct iovec local_iov&#91;MAX_REGIONS];
    struct iovec remote_iov&#91;MAX_REGIONS];
    ssize_t total_bytes;
    int i;
    
    // 参数检查
    if (count > MAX_REGIONS) {
        printf("区域数量超过最大限制 %d\n", MAX_REGIONS);
        return -1;
    }
    
    // 为每个区域分配本地缓冲区并设置iovec
    for (i = 0; i < count; i++) {
        regions&#91;i].data = malloc(regions&#91;i].size);
        if (!regions&#91;i].data) {
            printf("为区域 %d 分配内存失败\n", i);
            // 释放已分配的内存
            for (int j = 0; j < i; j++) {
                free(regions&#91;j].data);
            }
            return -1;
        }
        
        // 设置本地iovec
        local_iov&#91;i].iov_base = regions&#91;i].data;
        local_iov&#91;i].iov_len = regions&#91;i].size;
        
        // 设置远程iovec
        remote_iov&#91;i].iov_base = regions&#91;i].remote_addr;
        remote_iov&#91;i].iov_len = regions&#91;i].size;
    }
    
    // 执行批量读取
    total_bytes = process_vm_readv(target_pid,
                                  local_iov, count,
                                  remote_iov, count,
                                  0);
    
    if (total_bytes == -1) {
        printf("批量读取失败: %s\n", strerror(errno));
        // 释放所有缓冲区
        for (i = 0; i < count; i++) {
            free(regions&#91;i].data);
        }
        return -1;
    }
    
    printf("批量读取成功，总共传输 %zd 字节\n", total_bytes);
    
    // 显示每个区域的数据
    for (i = 0; i < count; i++) {
        printf("区域 %d (地址 0x%lx, 大小 %zu): ", 
               i, (unsigned long)regions&#91;i].remote_addr, regions&#91;i].size);
        for (size_t j = 0; j < regions&#91;i].size && j < 20; j++) {
            if (regions&#91;i].data&#91;j] >= 32 && regions&#91;i].data&#91;j] <= 126) {
                putchar(regions&#91;i].data&#91;j]);
            } else {
                printf(".");
            }
        }
        printf("\n");
    }
    
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 2) {
        printf("使用方法: %s <目标进程PID>\n", argv&#91;0]);
        return 1;
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    memory_region_t regions&#91;3];
    
    // 设置要读取的内存区域（实际使用时需要根据目标进程调整地址）
    regions&#91;0].remote_addr = (void*)0x601000;  // 示例地址1
    regions&#91;0].size = 32;
    regions&#91;1].remote_addr = (void*)0x601020;  // 示例地址2
    regions&#91;1].size = 16;
    regions&#91;2].remote_addr = (void*)0x601030;  // 示例地址3
    regions&#91;2].size = 8;
    
    printf("批量读取进程 %d 的多个内存区域\n", target_pid);
    
    if (batch_read_memory_regions(target_pid, regions, 3) == 0) {
        printf("批量读取操作完成\n");
        // 释放内存
        for (int i = 0; i < 3; i++) {
            free(regions&#91;i].data);
        }
    } else {
        printf("批量读取操作失败\n");
    }
    
    return 0;
}

示例3：内存写入和修改示例

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <signal.h>

/**
 * 写入数据到目标进程内存
 */
int write_to_target_memory(pid_t target_pid, void *remote_addr, 
                          const void *data, size_t size) {
    struct iovec local_iov&#91;1];
    struct iovec remote_iov&#91;1];
    ssize_t bytes_written;
    
    // 设置本地iovec（要写入的数据）
    local_iov&#91;0].iov_base = (void*)data;  // 强制转换，实际不会修改
    local_iov&#91;0].iov_len = size;
    
    // 设置远程iovec（目标地址）
    remote_iov&#91;0].iov_base = remote_addr;
    remote_iov&#91;0].iov_len = size;
    
    // 执行写入操作
    bytes_written = process_vm_writev(target_pid,
                                     local_iov, 1,
                                     remote_iov, 1,
                                     0);
    
    if (bytes_written == -1) {
        printf("process_vm_writev 失败: %s\n", strerror(errno));
        return -1;
    }
    
    printf("成功写入 %zd 字节到进程 %d 的地址 0x%lx\n", 
           bytes_written, target_pid, (unsigned long)remote_addr);
    
    return 0;
}

/**
 * 发送信号给目标进程
 */
int send_signal_to_process(pid_t target_pid, int signal) {
    if (kill(target_pid, signal) == -1) {
        printf("发送信号失败: %s\n", strerror(errno));
        return -1;
    }
    printf("成功发送信号 %d 到进程 %d\n", signal, target_pid);
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 2) {
        printf("使用方法: %s <目标进程PID>\n", argv&#91;0]);
        return 1;
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    char new_message&#91;] = "Hello from process_vm_writev!";
    int new_value = 999999;
    
    printf("准备修改进程 %d 的内存\n", target_pid);
    
    // 写入字符串数据（需要知道目标进程的确切地址）
    if (write_to_target_memory(target_pid, (void*)0x601060, 
                              new_message, strlen(new_message) + 1) == 0) {
        printf("字符串写入成功\n");
    }
    
    // 写入整数数据
    if (write_to_target_memory(target_pid, (void*)0x601040, 
                              &new_value, sizeof(new_value)) == 0) {
        printf("整数写入成功\n");
    }
    
    // 发送信号让目标进程重新显示数据
    send_signal_to_process(target_pid, SIGUSR1);
    
    return 0;
}

示例4：完整的测试目标进程

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>

// 这些变量的地址将被其他进程读取和修改
int shared_counter = 42;
char shared_message&#91;256] = "Original message from target process";
double shared_double = 3.14159;

volatile int running = 1;

void signal_handler(int sig) {
    if (sig == SIGUSR1) {
        printf("收到SIGUSR1信号，显示当前数据:\n");
        printf("  Counter: %d\n", shared_counter);
        printf("  Message: %s\n", shared_message);
        printf("  Double:  %.5f\n", shared_double);
    } else if (sig == SIGINT) {
        printf("收到终止信号\n");
        running = 0;
    }
}

int main() {
    printf("=== 目标测试进程 ===\n");
    printf("进程PID: %d\n", getpid());
    printf("变量地址信息:\n");
    printf("  shared_counter 地址: %p\n", &shared_counter);
    printf("  shared_message 地址: %p\n", shared_message);
    printf("  shared_double  地址: %p\n", &shared_double);
    printf("\n");
    printf("初始数据:\n");
    printf("  Counter: %d\n", shared_counter);
    printf("  Message: %s\n", shared_message);
    printf("  Double:  %.5f\n", shared_double);
    printf("\n");
    printf("按 Ctrl+C 退出，或等待其他进程发送 SIGUSR1 信号\n");
    
    // 注册信号处理程序
    signal(SIGUSR1, signal_handler);
    signal(SIGINT, signal_handler);
    
    // 主循环
    while (running) {
        sleep(1);
    }
    
    printf("目标进程退出\n");
    return 0;
}

使用注意事项

权限要求：

调用进程和目标进程必须具有相同的用户ID2. 或者调用进程必须具有 CAP_SYS_PTRACE 权限3. 目标进程必须正在运行（不能是僵尸进程）

地址获取：

需要知道目标进程的确切内存地址2. 可以通过调试器、符号表或/proc/[pid]/maps文件获取3. 地址必须在目标进程的有效地址空间内

错误处理：

始终检查返回值2. 处理部分传输的情况3. 确保本地缓冲区足够大

安全考虑：

这些函数可能被恶意程序用于内存篡改2. 在生产环境中应谨慎使用3. 系统管理员可以通过安全策略限制使用

性能特点：

比传统的IPC机制更高效2. 避免了内核缓冲区复制3. 适合大量数据传输场景

https://blog.csdn.net/timberwolf007/article/details/150415662?spm=1011.2415.3001.5331

2025-08-16

Linux系统编程

process_vm_readv系统调用及示例

process_vm_readv/process_vm_writev 函数详解

函数介绍

函数原型

#define _GNU_SOURCE
#include <sys/uio.h>

ssize_t process_vm_readv(pid_t pid,
                        const struct iovec *local_iov,
                        unsigned long liovcnt,
                        const struct iovec *remote_iov,
                        unsigned long riovcnt,
                        unsigned long flags);

ssize_t process_vm_writev(pid_t pid,
                         const struct iovec *local_iov,
                         unsigned long liovcnt,
                         const struct iovec *remote_iov,
                         unsigned long riovcnt,
                         unsigned long flags);

功能

process_vm_readv: 从指定进程(pid)的内存中读取数据到当前进程的内存中
process_vm_writev: 将当前进程的内存数据写入到指定进程(pid)的内存中
这两个函数都支持分散/聚集I/O操作，可以同时处理多个不连续的内存区域
数据传输直接在用户空间进行，避免了内核缓冲区的复制，提高了性能

参数

共同参数说明：

pid_t pid: 目标进程的进程ID

必须是正在运行的进程
调用进程必须有权限访问该进程（相同用户或具有CAP_SYS_PTRACE权限）

*const struct iovec local_iov: 本地内存区域描述符数组

描述当前进程中用于读写操作的内存缓冲区
每个iovec结构包含基地址和长度

unsigned long liovcnt: 本地iovec数组的元素个数

指定local_iov数组中有效元素的数量

*const struct iovec remote_iov: 远程内存区域描述符数组

描述目标进程中用于读写操作的内存地址
每个iovec结构包含基地址和长度

unsigned long riovcnt: 远程iovec数组的元素个数

指定remote_iov数组中有效元素的数量

unsigned long flags: 标志位（保留字段）

当前必须设置为0

iovec结构体定义：

struct iovec {
    void  *iov_base;    // 内存区域的起始地址
    size_t iov_len;     // 内存区域的长度（字节数）
};

返回值

成功时：

返回实际传输的字节数
可能小于请求的总字节数（部分传输）

失败时：

返回-1，并设置errno错误码

常见错误码：

EACCES: 没有权限访问目标进程内存
EFAULT: 指定的内存地址范围无效
EINVAL: 参数无效（如flags非0，iovcnt过大等）
ENOMEM: 内存不足
EPERM: 没有权限操作目标进程
ESRCH: 目标进程不存在或已终止

相似函数或关联函数

相似函数：

readv/writev: 在单个进程内进行分散/聚集I/O操作
preadv/pwritev: 带偏移量的分散/聚集I/O操作
ptrace: 更通用的进程调试和控制接口

关联函数：

kill: 向进程发送信号
wait/waitpid: 等待子进程状态变化
getpid/getppid: 获取进程ID信息

示例代码

示例1：基础内存读取示例

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

/**
 * 读取目标进程内存的简单示例
 * 注意：需要知道目标进程的确切内存地址
 */
int read_target_process_memory(pid_t target_pid, void *remote_addr, size_t size) {
    char *local_buffer;
    struct iovec local_iov&#91;1];
    struct iovec remote_iov&#91;1];
    ssize_t bytes_transferred;
    
    // 分配本地缓冲区
    local_buffer = malloc(size);
    if (!local_buffer) {
        perror("malloc failed");
        return -1;
    }
    
    // 设置本地iovec（当前进程的缓冲区）
    local_iov&#91;0].iov_base = local_buffer;
    local_iov&#91;0].iov_len = size;
    
    // 设置远程iovec（目标进程的内存地址）
    remote_iov&#91;0].iov_base = remote_addr;
    remote_iov&#91;0].iov_len = size;
    
    // 执行读取操作
    bytes_transferred = process_vm_readv(target_pid,
                                        local_iov, 1,      // 本地1个iovec
                                        remote_iov, 1,     // 远程1个iovec
                                        0);                // flags必须为0
    
    if (bytes_transferred == -1) {
        printf("process_vm_readv failed: %s\n", strerror(errno));
        free(local_buffer);
        return -1;
    }
    
    printf("成功读取 %zd 字节数据\n", bytes_transferred);
    printf("读取的数据内容: ");
    for (size_t i = 0; i < bytes_transferred && i < 50; i++) {
        if (local_buffer&#91;i] >= 32 && local_buffer&#91;i] <= 126) {
            putchar(local_buffer&#91;i]);
        } else {
            printf("\\x%02x", (unsigned char)local_buffer&#91;i]);
        }
    }
    printf("\n");
    
    free(local_buffer);
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 4) {
        printf("使用方法: %s <目标进程PID> <内存地址> <读取大小>\n", argv&#91;0]);
        printf("示例: %s 1234 0x601040 100\n", argv&#91;0]);
        return 1;
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    unsigned long addr = strtoul(argv&#91;2], NULL, 0);
    size_t size = strtoul(argv&#91;3], NULL, 0);
    
    printf("尝试读取进程 %d 的内存地址 0x%lx，大小 %zu 字节\n", 
           target_pid, addr, size);
    
    return read_target_process_memory(target_pid, (void*)addr, size);
}

示例2：批量内存操作示例

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define MAX_REGIONS 10

/**
 * 批量读取多个内存区域
 */
typedef struct {
    void *remote_addr;    // 远程地址
    size_t size;          // 区域大小
    char *data;           // 读取到的数据
} memory_region_t;

int batch_read_memory_regions(pid_t target_pid, memory_region_t *regions, int count) {
    struct iovec local_iov&#91;MAX_REGIONS];
    struct iovec remote_iov&#91;MAX_REGIONS];
    ssize_t total_bytes;
    int i;
    
    // 参数检查
    if (count > MAX_REGIONS) {
        printf("区域数量超过最大限制 %d\n", MAX_REGIONS);
        return -1;
    }
    
    // 为每个区域分配本地缓冲区并设置iovec
    for (i = 0; i < count; i++) {
        regions&#91;i].data = malloc(regions&#91;i].size);
        if (!regions&#91;i].data) {
            printf("为区域 %d 分配内存失败\n", i);
            // 释放已分配的内存
            for (int j = 0; j < i; j++) {
                free(regions&#91;j].data);
            }
            return -1;
        }
        
        // 设置本地iovec
        local_iov&#91;i].iov_base = regions&#91;i].data;
        local_iov&#91;i].iov_len = regions&#91;i].size;
        
        // 设置远程iovec
        remote_iov&#91;i].iov_base = regions&#91;i].remote_addr;
        remote_iov&#91;i].iov_len = regions&#91;i].size;
    }
    
    // 执行批量读取
    total_bytes = process_vm_readv(target_pid,
                                  local_iov, count,
                                  remote_iov, count,
                                  0);
    
    if (total_bytes == -1) {
        printf("批量读取失败: %s\n", strerror(errno));
        // 释放所有缓冲区
        for (i = 0; i < count; i++) {
            free(regions&#91;i].data);
        }
        return -1;
    }
    
    printf("批量读取成功，总共传输 %zd 字节\n", total_bytes);
    
    // 显示每个区域的数据
    for (i = 0; i < count; i++) {
        printf("区域 %d (地址 0x%lx, 大小 %zu): ", 
               i, (unsigned long)regions&#91;i].remote_addr, regions&#91;i].size);
        for (size_t j = 0; j < regions&#91;i].size && j < 20; j++) {
            if (regions&#91;i].data&#91;j] >= 32 && regions&#91;i].data&#91;j] <= 126) {
                putchar(regions&#91;i].data&#91;j]);
            } else {
                printf(".");
            }
        }
        printf("\n");
    }
    
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 2) {
        printf("使用方法: %s <目标进程PID>\n", argv&#91;0]);
        return 1;
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    memory_region_t regions&#91;3];
    
    // 设置要读取的内存区域（实际使用时需要根据目标进程调整地址）
    regions&#91;0].remote_addr = (void*)0x601000;  // 示例地址1
    regions&#91;0].size = 32;
    regions&#91;1].remote_addr = (void*)0x601020;  // 示例地址2
    regions&#91;1].size = 16;
    regions&#91;2].remote_addr = (void*)0x601030;  // 示例地址3
    regions&#91;2].size = 8;
    
    printf("批量读取进程 %d 的多个内存区域\n", target_pid);
    
    if (batch_read_memory_regions(target_pid, regions, 3) == 0) {
        printf("批量读取操作完成\n");
        // 释放内存
        for (int i = 0; i < 3; i++) {
            free(regions&#91;i].data);
        }
    } else {
        printf("批量读取操作失败\n");
    }
    
    return 0;
}

示例3：内存写入和修改示例

#define _GNU_SOURCE
#include <sys/uio.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <signal.h>

/**
 * 写入数据到目标进程内存
 */
int write_to_target_memory(pid_t target_pid, void *remote_addr, 
                          const void *data, size_t size) {
    struct iovec local_iov&#91;1];
    struct iovec remote_iov&#91;1];
    ssize_t bytes_written;
    
    // 设置本地iovec（要写入的数据）
    local_iov&#91;0].iov_base = (void*)data;  // 强制转换，实际不会修改
    local_iov&#91;0].iov_len = size;
    
    // 设置远程iovec（目标地址）
    remote_iov&#91;0].iov_base = remote_addr;
    remote_iov&#91;0].iov_len = size;
    
    // 执行写入操作
    bytes_written = process_vm_writev(target_pid,
                                     local_iov, 1,
                                     remote_iov, 1,
                                     0);
    
    if (bytes_written == -1) {
        printf("process_vm_writev 失败: %s\n", strerror(errno));
        return -1;
    }
    
    printf("成功写入 %zd 字节到进程 %d 的地址 0x%lx\n", 
           bytes_written, target_pid, (unsigned long)remote_addr);
    
    return 0;
}

/**
 * 发送信号给目标进程
 */
int send_signal_to_process(pid_t target_pid, int signal) {
    if (kill(target_pid, signal) == -1) {
        printf("发送信号失败: %s\n", strerror(errno));
        return -1;
    }
    printf("成功发送信号 %d 到进程 %d\n", signal, target_pid);
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 2) {
        printf("使用方法: %s <目标进程PID>\n", argv&#91;0]);
        return 1;
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    char new_message&#91;] = "Hello from process_vm_writev!";
    int new_value = 999999;
    
    printf("准备修改进程 %d 的内存\n", target_pid);
    
    // 写入字符串数据（需要知道目标进程的确切地址）
    if (write_to_target_memory(target_pid, (void*)0x601060, 
                              new_message, strlen(new_message) + 1) == 0) {
        printf("字符串写入成功\n");
    }
    
    // 写入整数数据
    if (write_to_target_memory(target_pid, (void*)0x601040, 
                              &new_value, sizeof(new_value)) == 0) {
        printf("整数写入成功\n");
    }
    
    // 发送信号让目标进程重新显示数据
    send_signal_to_process(target_pid, SIGUSR1);
    
    return 0;
}

示例4：完整的测试目标进程

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>

// 这些变量的地址将被其他进程读取和修改
int shared_counter = 42;
char shared_message&#91;256] = "Original message from target process";
double shared_double = 3.14159;

volatile int running = 1;

void signal_handler(int sig) {
    if (sig == SIGUSR1) {
        printf("收到SIGUSR1信号，显示当前数据:\n");
        printf("  Counter: %d\n", shared_counter);
        printf("  Message: %s\n", shared_message);
        printf("  Double:  %.5f\n", shared_double);
    } else if (sig == SIGINT) {
        printf("收到终止信号\n");
        running = 0;
    }
}

int main() {
    printf("=== 目标测试进程 ===\n");
    printf("进程PID: %d\n", getpid());
    printf("变量地址信息:\n");
    printf("  shared_counter 地址: %p\n", &shared_counter);
    printf("  shared_message 地址: %p\n", shared_message);
    printf("  shared_double  地址: %p\n", &shared_double);
    printf("\n");
    printf("初始数据:\n");
    printf("  Counter: %d\n", shared_counter);
    printf("  Message: %s\n", shared_message);
    printf("  Double:  %.5f\n", shared_double);
    printf("\n");
    printf("按 Ctrl+C 退出，或等待其他进程发送 SIGUSR1 信号\n");
    
    // 注册信号处理程序
    signal(SIGUSR1, signal_handler);
    signal(SIGINT, signal_handler);
    
    // 主循环
    while (running) {
        sleep(1);
    }
    
    printf("目标进程退出\n");
    return 0;
}

使用注意事项

权限要求：

调用进程和目标进程必须具有相同的用户ID2. 或者调用进程必须具有 CAP_SYS_PTRACE 权限3. 目标进程必须正在运行（不能是僵尸进程）

地址获取：

需要知道目标进程的确切内存地址2. 可以通过调试器、符号表或/proc/[pid]/maps文件获取3. 地址必须在目标进程的有效地址空间内

错误处理：

始终检查返回值2. 处理部分传输的情况3. 确保本地缓冲区足够大

安全考虑：

这些函数可能被恶意程序用于内存篡改2. 在生产环境中应谨慎使用3. 系统管理员可以通过安全策略限制使用

性能特点：

比传统的IPC机制更高效2. 避免了内核缓冲区复制3. 适合大量数据传输场景

2025-08-16

Linux系统编程

setrlimit系统调用及示例

我们来深入学习 setrlimit 系统调用

1. 函数介绍

在 Linux 系统中，为了保证系统的稳定性和公平性，防止某个程序因为 bug 或恶意行为而耗尽系统资源（如内存、CPU 时间、打开的文件数量等），内核提供了一种资源限制 (Resource Limits) 机制。

setrlimit (Set Resource Limit) 系统调用的作用就是设置调用进程（及其未来创建的子进程）对某一类系统资源的使用上限。

你可以把它想象成你给一个程序分配一个“资源使用预算”或“配额”。比如，你可以告诉内核：“这个程序最多只能使用 100MB 的内存”、“最多只能打开 10 个文件”、“最多只能运行 10 秒钟”等等。当程序试图超出这个限制时，内核会根据资源类型采取不同措施，通常是拒绝其请求（例如 malloc 失败）或发送一个信号（例如 SIGXCPU）来终止它。

简单来说，setrlimit 就是让你用程序来给另一个程序（或自己）“上规矩”，限制它能用多少系统资源。

2. 函数原型

#include <sys/resource.h> // 包含系统调用声明和常量

int setrlimit(int resource, const struct rlimit *rlim);

3. 功能

为调用进程设置指定资源 resource 的软限制 (soft limit) 和硬限制 (hard limit)。

4. 参数

resource:

int 类型。

指定要设置限制的资源类型。常见的资源类型定义在 <sys/resource.h> 中，例如：

RLIMIT_AS: 进程虚拟地址空间的最大总大小（字节）。限制进程能分配的总内存。
RLIMIT_CORE: 程序崩溃时创建的核心转储文件 (core dump) 的最大字节数。设置为 0 可以禁用 core dump。
RLIMIT_CPU: 进程可以使用的 CPU 时间（秒）。达到软限制会收到 SIGXCPU 信号，达到硬限制会被 SIGKILL 终止。
RLIMIT_DATA: 进程数据段的最大字节大小（通过 brk/sbrk 分配的内存）。
RLIMIT_FSIZE: 进程可以创建的文件的最大字节大小。超出限制时写操作会失败，并可能收到 SIGXFSZ 信号。
RLIMIT_NOFILE: 进程可以同时打开的文件描述符（File Descriptor）的最大数量。
RLIMIT_NPROC: 调用用户 ID (Real User ID) 可以拥有的最大进程/线程数量。
RLIMIT_STACK: 进程栈的最大字节大小。
RLIMIT_MEMLOCK: 可以使用 mlock 锁定在内存中的最大字节数。
RLIMIT_RSS: 进程在物理内存中驻留的最大字节数（Resident Set Size）。(在 Linux 上可能不强制执行)。
RLIMIT_NICE: nice 值的上限（影响进程调度优先级）。
… 还有其他一些资源类型。

rlim:

const struct rlimit * 类型。

一个指向 rlimit 结构体的指针，该结构体定义了资源的限制。rlimit 结构体定义如下：struct rlimit { rlim_t rlim_cur; // Soft limit (软限制) rlim_t rlim_max; // Hard limit (硬限制) };

rlim_cur (软限制):

这是内核实际执行强制限制的值。
进程可以随时将其修改为小于或等于当前硬限制 (rlim_max) 的任何值。
超过软限制通常会导致内核发送一个信号（如 SIGXCPU）来警告进程。

rlim_max (硬限制):

这是软限制可以被设置的上限。
普通用户只能降低硬限制，不能提高它。
只有特权用户 (root) 才能提高硬限制。
进程可以在任何时候将硬限制降低到等于或低于当前硬限制的值。

5. 返回值

成功: 返回 0。
失败: 返回 -1，并设置全局变量 errno 来指示具体的错误原因。

6. 错误码 (errno)

EFAULT: rlim 指向了调用进程无法访问的内存地址。
EINVAL: resource 参数无效，或者指定的限制值无效（例如，负数，或者对于某些资源类型不合适）。
EPERM: 调用进程没有权限设置指定的限制。最常见的原因是普通用户试图提高硬限制 (rlim_max)。

7. 相似函数或关联函数

getrlimit: 用于获取当前进程对某类资源的限制设置。
prlimit: 一个更现代的系统调用，可以同时设置和获取任意进程的资源限制（需要 CAP_SYS_RESOURCE 能力）。
ulimit: 命令行工具（在 shell 中），用于设置当前 shell 及其子进程的资源限制。它在底层调用 setrlimit 和 getrlimit。
struct rlimit: 定义限制值的数据结构。

8. 示例代码

下面的示例演示了如何使用 setrlimit 来设置几种常见的资源限制。

#define _GNU_SOURCE // 启用 GNU 扩展
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/resource.h> // 包含 setrlimit, getrlimit, struct rlimit
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <sys/time.h> // 包含 timeval, 用于 CPU 时间限制
#include <fcntl.h>   // 包含 open

// 信号处理函数，用于捕获因资源限制而产生的信号
void signal_handler(int sig) {
    printf("\nCaught signal %d\n", sig);
    if (sig == SIGXCPU) {
        printf("CPU time limit (soft) reached. Exiting gracefully.\n");
        exit(EXIT_FAILURE);
    } else if (sig == SIGXFSZ) {
        printf("File size limit reached. Write operation failed.\n");
        // 可以选择继续运行或退出
    }
}

// 打印特定资源的当前限制
void print_resource_limit(const char* resource_name, int resource) {
    struct rlimit rl;
    if (getrlimit(resource, &rl) == 0) {
        printf("%-15s: Soft = ", resource_name);
        if (rl.rlim_cur == RLIM_INFINITY) printf("unlimited");
        else printf("%ld", (long)rl.rlim_cur);

        printf(", Hard = ");
        if (rl.rlim_max == RLIM_INFINITY) printf("unlimited");
        else printf("%ld", (long)rl.rlim_max);
        printf("\n");
    } else {
        perror("getrlimit");
    }
}

int main() {
    struct rlimit rl;
    struct sigaction sa;

    printf("--- Demonstrating setrlimit ---\n");
    printf("PID: %d\n", getpid());

    // 1. 显示初始的一些资源限制
    printf("\n--- Initial Resource Limits ---\n");
    print_resource_limit("CPU Time", RLIMIT_CPU);
    print_resource_limit("File Size", RLIMIT_FSIZE);
    print_resource_limit("Data Segment", RLIMIT_DATA);
    print_resource_limit("Stack Size", RLIMIT_STACK);
    print_resource_limit("Virtual Memory", RLIMIT_AS);
    print_resource_limit("Open Files", RLIMIT_NOFILE);
    print_resource_limit("Max Processes", RLIMIT_NPROC);
    print_resource_limit("Core File Size", RLIMIT_CORE);

    // 2. 设置 CPU 时间限制
    printf("\n--- Setting CPU Time Limit ---\n");
    rl.rlim_cur = 5;  // 软限制：5 秒
    rl.rlim_max = 10; // 硬限制：10 秒
    if (setrlimit(RLIMIT_CPU, &rl) == 0) {
        printf("Set CPU time limit: Soft = %lds, Hard = %lds\n", (long)rl.rlim_cur, (long)rl.rlim_max);
        // 设置信号处理函数以捕获 SIGXCPU
        memset(&sa, 0, sizeof(sa));
        sa.sa_handler = signal_handler;
        sigemptyset(&sa.sa_mask);
        if (sigaction(SIGXCPU, &sa, NULL) == -1) {
            perror("sigaction SIGXCPU");
        }
    } else {
        perror("setrlimit RLIMIT_CPU");
    }

    // 3. 设置最大打开文件数限制
    printf("\n--- Setting Open File Descriptor Limit ---\n");
    rl.rlim_cur = 10; // 软限制：最多 10 个文件描述符
    rl.rlim_max = 20; // 硬限制：最多 20 个文件描述符
    if (setrlimit(RLIMIT_NOFILE, &rl) == 0) {
        printf("Set open file limit: Soft = %ld, Hard = %ld\n", (long)rl.rlim_cur, (long)rl.rlim_max);
    } else {
        perror("setrlimit RLIMIT_NOFILE");
    }

    // 4. 设置最大文件大小限制
    printf("\n--- Setting File Size Limit ---\n");
    rl.rlim_cur = 1024 * 1024; // 软限制：1MB
    rl.rlim_max = 2 * 1024 * 1024; // 硬限制：2MB
    if (setrlimit(RLIMIT_FSIZE, &rl) == 0) {
        printf("Set file size limit: Soft = %ld bytes, Hard = %ld bytes\n", (long)rl.rlim_cur, (long)rl.rlim_max);
        // 设置信号处理函数以捕获 SIGXFSZ
        sa.sa_handler = signal_handler;
        if (sigaction(SIGXFSZ, &sa, NULL) == -1) {
            perror("sigaction SIGXFSZ");
        }
    } else {
        perror("setrlimit RLIMIT_FSIZE");
    }

    // 5. 设置虚拟内存限制 (RLIMIT_AS)
    printf("\n--- Setting Virtual Memory Limit ---\n");
    rl.rlim_cur = 50 * 1024 * 1024; // 软限制：50 MB
    rl.rlim_max = 100 * 1024 * 1024; // 硬限制：100 MB
    if (setrlimit(RLIMIT_AS, &rl) == 0) {
        printf("Set virtual memory limit: Soft = %ld bytes (%.2f MB), Hard = %ld bytes (%.2f MB)\n",
               (long)rl.rlim_cur, (double)rl.rlim_cur / (1024*1024),
               (long)rl.rlim_max, (double)rl.rlim_max / (1024*1024));
    } else {
        perror("setrlimit RLIMIT_AS");
    }

    // 6. 设置 core dump 大小为 0，禁用它
    printf("\n--- Disabling Core Dump ---\n");
    rl.rlim_cur = 0;
    rl.rlim_max = 0;
    if (setrlimit(RLIMIT_CORE, &rl) == 0) {
        printf("Disabled core dump generation.\n");
    } else {
        perror("setrlimit RLIMIT_CORE");
    }

    // 7. 验证设置后的限制
    printf("\n--- Resource Limits After setrlimit ---\n");
    print_resource_limit("CPU Time", RLIMIT_CPU);
    print_resource_limit("File Size", RLIMIT_FSIZE);
    print_resource_limit("Virtual Memory", RLIMIT_AS);
    print_resource_limit("Open Files", RLIMIT_NOFILE);
    print_resource_limit("Core File Size", RLIMIT_CORE);

    // 8. 演示资源限制的效果

    // --- 演示 RLIMIT_FSIZE ---
    printf("\n--- Testing RLIMIT_FSIZE (File Size Limit) ---\n");
    const char* test_filename = "test_limited_file.txt";
    int fd = open(test_filename, O_CREAT | O_WRONLY | O_TRUNC, 0644);
    if (fd == -1) {
        perror("open test file");
    } else {
        char data&#91;1024];
        memset(data, 'A', sizeof(data));
        ssize_t written;
        long total_written = 0;
        // 尝试写入超过 1MB 的数据
        while (total_written < 2 * 1024 * 1024) {
            written = write(fd, data, sizeof(data));
            if (written == -1) {
                perror("write");
                printf("Write failed after writing approximately %ld bytes. File size limit likely reached.\n", total_written);
                break;
            }
            total_written += written;
        }
        close(fd);
        printf("Finished writing (or failed) to file.\n");
        // 清理测试文件
        unlink(test_filename);
    }

    // --- 演示 RLIMIT_AS ---
    printf("\n--- Testing RLIMIT_AS (Virtual Memory Limit) ---\n");
    printf("Attempting to allocate large chunks of memory until limit is hit...\n");
    size_t chunk_size = 10 * 1024 * 1024; // 10MB
    long allocated_mb = 0;
    char *ptr;
    while (1) {
        ptr = malloc(chunk_size);
        if (ptr == NULL) {
            printf("malloc failed after allocating approximately %ld MB. Memory limit likely reached.\n", allocated_mb);
            break;
        }
        // Touch the memory to ensure it's actually allocated
        memset(ptr, 0, chunk_size);
        allocated_mb += chunk_size / (1024 * 1024);
        printf("Allocated %ld MB so far...\n", allocated_mb);
        // 添加一点延迟，方便观察
        sleep(1);
    }

    // --- 演示 RLIMIT_CPU (放在最后，因为它会终止程序) ---
    printf("\n--- Testing RLIMIT_CPU (CPU Time Limit) ---\n");
    printf("Entering infinite loop. Should be killed by SIGKILL after 10 seconds (hard limit).\n");
    printf("You might see 'CPU time limit (soft) reached' message first (after 5s), then termination.\n");
    while(1) {
        // 空循环，消耗 CPU 时间
    }

    // 程序通常不会执行到这里，因为 RLIMIT_CPU 会终止它
    printf("Program finished normally (unexpected).\n");
    return 0;
}

9. 编译和运行

# 假设代码保存在 setrlimit_example.c 中
gcc -o setrlimit_example setrlimit_example.c

# 运行程序
./setrlimit_example

10. 预期输出 (片段)

--- Demonstrating setrlimit ---
PID: 12345

--- Initial Resource Limits ---
CPU Time       : Soft = unlimited, Hard = unlimited
File Size      : Soft = unlimited, Hard = unlimited
Data Segment   : Soft = unlimited, Hard = unlimited
Stack Size     : Soft = 8388608, Hard = unlimited
Virtual Memory : Soft = unlimited, Hard = unlimited
Open Files     : Soft = 1024, Hard = 1048576
Max Processes  : Soft = 62545, Hard = 62545
Core File Size : Soft = 0, Hard = unlimited

--- Setting CPU Time Limit ---
Set CPU time limit: Soft = 5s, Hard = 10s

--- Setting Open File Descriptor Limit ---
Set open file limit: Soft = 10, Hard = 20

--- Setting File Size Limit ---
Set file size limit: Soft = 1048576 bytes, Hard = 2097152 bytes

--- Setting Virtual Memory Limit ---
Set virtual memory limit: Soft = 52428800 bytes (50.00 MB), Hard = 104857600 bytes (100.00 MB)

--- Disabling Core Dump ---
Disabled core dump generation.

--- Resource Limits After setrlimit ---
CPU Time       : Soft = 5, Hard = 10
File Size      : Soft = 1048576, Hard = 2097152
Virtual Memory : Soft = 52428800, Hard = 104857600
Open Files     : Soft = 10, Hard = 20
Core File Size : Soft = 0, Hard = 0

--- Testing RLIMIT_FSIZE (File Size Limit) ---
Finished writing (or failed) to file.

--- Testing RLIMIT_AS (Virtual Memory Limit) ---
Attempting to allocate large chunks of memory until limit is hit...
Allocated 10 MB so far...
Allocated 20 MB so far...
...
Allocated 50 MB so far...
malloc failed after allocating approximately 50 MB. Memory limit likely reached.

--- Testing RLIMIT_CPU (CPU Time Limit) ---
Entering infinite loop. Should be killed by SIGKILL after 10 seconds (hard limit).
You might see 'CPU time limit (soft) reached' message first (after 5s), then termination.

Caught signal 24
CPU time limit (soft) reached. Exiting gracefully.
&#91;程序被终止]

11. 总结

setrlimit 是一个非常有用的系统调用，用于管理和控制进程对系统资源的消耗。它对于编写健壮、安全的系统程序和服务至关重要，可以防止资源耗尽导致的系统不稳定。理解软限制和硬限制的区别，以及不同资源类型的行为，是掌握 Linux 进程管理的基础。

2025-08-15

Linux命令

Linux 系统中安装中文man手册

轻松掌握Linux系统中中文man手册的安装方法，快速获取帮助文档。适合所有Linux用户学习参考，立即访问了解更多！

在 Linux 系统中获取中文man手册的最合适方法如下：

一、主流发行版安装方法

1. Debian/Ubuntu 及衍生系统

# 更新软件源
sudo apt update
# 安装中文 man 手册包
sudo apt install manpages-zh

安装后，中文手册默认位于 /usr/share/man/zh_CN/。

2. RHEL/CentOS 及衍生系统

# 安装 EPEL 源（CentOS 7 及更早）
sudo yum install epel-release
# 安装中文 man 手册包
sudo yum install manpages-zh

安装后，中文手册路径为 /usr/share/man/zh_CN/。

3. Arch Linux 及衍生系统

1
2
3

# 直接安装预编译包
sudo pacman -S man-pages-zh_cn

安装路径为 /usr/share/man/zh_CN/。

二、配置中文手册别名

安装完成后，建议设置命令别名以区分中英文手册：

# 编辑 ~/.bashrc 或 ~/.zshrc
echo "alias cman='man -M /usr/share/man/zh_CN'" >> ~/.bashrc
# 生效配置
source ~/.bashrc

之后通过 cman ls 即可查看中文帮助网页。

三、注意事项

语言环境设置确保系统语言环境为中文（如 zh_CN.UTF-8），否则可能显示乱码：export LANG=zh_CN.UTF-8 可将此行加入 ~/.bashrc 永久生效。

依赖问题

Arch Linux 需提前安装 automake 和 opencc 等依赖。
CentOS 若安装失败，尝试更新系统并安装 langpacks-zh_CN。

乱码处理若出现乱码，安装中文字体（如 fonts-wqy-microhei）：sudo apt install fonts-wqy-microhei # Debian/Ubuntu

四、手动编译安装（备用方案）

若官方源无中文手册包，可手动编译：

# 下载源码（以 manpages-zh 1.5.1 为例）
wget https://src.fedoraproject.org/repo/pkgs/manpages-zh-CN/manpages-zh-1.5.1.tar.gz
tar -zxvf manpages-zh-1.5.1.tar.gz
cd manpages-zh-1.5.1
# 编译安装
./configure --prefix=/usr/local/zhman
make && sudo make install

安装后需设置 MANPATH 环境变量。

五、验证安装

# 查看中文手册路径
man -w | grep zh_CN
# 测试中文手册
cman ls  # 应显示中文帮助

总结

推荐方法：通过包管理器直接安装（如 manpages-zh），简单高效。
兼容性：部分命令可能无中文翻译，需结合英文手册使用。
扩展性：通过别名 cman 可无缝切换中英文手册，提升使用效率。

系统的区域设置（Locale）未正确配置，导致 man 命令无法正确识别中文手册路径。以下是详细的解决步骤：

问题原因

man -w 命令用于查询 man 手册的搜索路径，但它依赖系统的区域设置（LANG 和 LC_* 环境变量）来定位多语言手册（如中文）。如果区域设置未正确配置为中文（如 zh_CN.UTF-8），man 会因无法识别语言环境而报错。

解决步骤

1. 临时设置正确的区域设置

首先临时设置 LANG 和 LC_ALL 为中文 UTF-8 编码，验证是否能解决问题：

# 设置环境变量（临时生效）
export LANG=zh_CN.UTF-8
export LC_ALL=zh_CN.UTF-8

# 再次执行 man -w 查看路径
man -w | grep zh_CN

如果输出类似 /usr/share/man/zh_CN/man1 的路径，说明区域设置已生效，问题解决。

2. 永久设置区域设置（推荐）

临时设置会在终端关闭后失效，需永久生效需修改系统区域配置。

方法 1：通过 dpkg-reconfigure（Debian/Ubuntu）

# 安装语言包（如果未安装）
sudo apt install language-pack-zh-hans language-pack-zh-hans-base

# 重新配置区域设置（选择 zh_CN.UTF-8）
sudo dpkg-reconfigure locales

在弹出的界面中，用空格选中 zh_CN.UTF-8，回车确认。

方法 2：手动修改配置文件（通用）

编辑 /etc/locale.conf（CentOS/RHEL）或 /etc/default/locale（Debian/Ubuntu），添加以下内容：

# 对于 CentOS/RHEL
echo 'LANG="zh_CN.UTF-8"' | sudo tee /etc/locale.conf
echo 'LC_ALL="zh_CN.UTF-8"' | sudo tee -a /etc/locale.conf

# 对于 Debian/Ubuntu
echo 'LANG="zh_CN.UTF-8"' | sudo tee /etc/default/locale
echo 'LC_ALL="zh_CN.UTF-8"' | sudo tee -a /etc/default/locale

保存后重启终端或执行 source /etc/locale.conf（或 source /etc/default/locale）生效。

3. 验证区域设置是否生效

执行以下命令检查当前区域设置：

1
2

locale

输出应包含 LANG=zh_CN.UTF-8 和 LC_ALL=zh_CN.UTF-8（或类似中文编码）。

4. 确认中文手册已正确安装

如果区域设置正确但仍无法找到中文手册，检查 manpages-zh 是否已安装（以 Debian/Ubuntu 为例）：

# 检查是否安装
dpkg -l | grep manpages-zh

# 未安装则重新安装
sudo apt reinstall manpages-zh

5. 手动添加中文手册路径（备用）

如果 man 仍无法自动识别中文路径，可手动编辑 man 的配置文件，添加中文手册路径：

# 编辑 man 配置文件
sudo vim /etc/manpath.config

# 在文件末尾添加以下内容（根据实际安装路径调整）
MANPATH_MAP /usr/local/bin  /usr/share/man/zh_CN
MANPATH_MAP /usr/bin        /usr/share/man/zh_CN
MANPATH_MAP /bin            /usr/share/man/zh_CN

总结

核心问题是系统区域设置未正确配置为中文（zh_CN.UTF-8）。通过临时或永久设置 LANG 和 LC_ALL 环境变量，并确保 manpages-zh 包已安装，即可解决 man -w 报错问题，正常使用中文手册。

系统级中文 Man 手册问题解决方案（整合版）

针对用户遇到的 LC_ALL 设置失败但 man ls 显示中文的问题，结合历史操作和系统特性，整理以下跨发行版通用解决方案，覆盖区域设置、手册安装、路径映射、缓存生成等核心环节。

一、问题根源分析

用户现象：LC_ALL=zh_CN.UTF-8 设置失败（报错 No such file or directory），但 LANG=zh_CN.UTF-8 man ls 能显示中文。核心原因：1. 区域设置未完全支持：系统未正确生成或安装 zh_CN.UTF-8 区域配置，导致 LC_ALL（最高优先级）无法生效。2. 语言环境回退机制：LANG（次优先级）作为默认语言环境，系统可能通过其他方式（如 manpages-zh 包的默认映射）找到了中文手册。3. 手册路径未完全映射：部分命令（如 bash）的中文手册未被 MANPATH_MAP 正确关联，导致仅部分命令显示中文。

二、分步解决方案（按发行版区分）

（一）Debian/Ubuntu 系统

1. 安装/修复中文语言包

确保系统支持 zh_CN.UTF-8 区域设置：

# 更新软件源
sudo apt update

# 安装简体中文语言包（含 UTF-8 支持）
sudo apt install -y language-pack-zh-hans language-pack-zh-hans-base

# 安装 man 手册的中文翻译（覆盖所有核心命令）
sudo apt install -y manpages-zh

2. 强制生成区域设置

即使 LC_ALL 设置失败，仍需确保 zh_CN.UTF-8 被系统识别：

# 生成 zh_CN.UTF-8 区域配置（关键！）
sudo locale-gen zh_CN.UTF-8

# 编辑区域配置文件（覆盖默认设置）
sudo vim /etc/default/locale

修改为以下内容（强制中文优先）：

LANG="zh_CN.UTF-8"
LC_ALL="zh_CN.UTF-8"  # 即使报错，仍保留此配置（系统会自动回退到 LANG）
LANGUAGE="zh_CN:zh"   # 语言优先级：中文 > 英文

3. 配置 Man 手册路径映射

编辑 /etc/man_db.conf，确保所有核心命令路径映射到中文手册：

1 2	sudo vim /etc/man_db.conf

在 MANPATH_MAP 部分添加（放在现有条目最前面，确保优先级）：

# 覆盖所有常用二进制目录（包括 bash、ls 等核心命令）
MANPATH_MAP	/bin			/usr/share/man/zh_CN
MANPATH_MAP	/usr/bin		/usr/share/man/zh_CN
MANPATH_MAP	/sbin			/usr/share/man/zh_CN
MANPATH_MAP	/usr/sbin		/usr/share/man/zh_CN
MANPATH_MAP	/usr/local/bin	/usr/share/man/zh_CN
MANPATH_MAP	/usr/local/sbin	/usr/share/man/zh_CN

4. 重新生成 Man 数据库缓存

修改配置后，强制刷新 man 的手册索引：

1 2	sudo mandb -c # 清除旧缓存并重新生成

5. 验证效果

# 不设置 LANG 直接查看（应自动使用中文）
man ls
man bash

# 显式指定中文环境（确保生效）
LANG=zh_CN.UTF-8 man man  # 应显示中文

（二）RHEL/CentOS 系统

1. 安装中文语言包与 Man 手册

# 启用 EPEL 源（若未启用）
sudo yum install -y epel-release

# 安装简体中文语言包
sudo yum install -y glibc-langpack-zh

# 安装 Man 手册的中文翻译
sudo yum install -y man-pages-zh-CN

2. 配置区域设置

编辑 /etc/locale.conf，设置中文环境：

1 2	sudo vim /etc/locale.conf

添加以下内容：

1
2
3

LANG="zh_CN.UTF-8"
LC_ALL="zh_CN.UTF-8"  # 保留配置（系统会自动回退到 LANG）

3. 手动生成区域设置（可选）

若 zh_CN.UTF-8 仍未被识别，手动生成：

# 查看当前支持的 region
locale -a

# 若缺失 zh_CN.UTF-8，手动生成（需 root）
sudo localectl set-locale LANG=zh_CN.UTF-8

4. 配置 Man 手册路径映射

编辑 /etc/man_db.conf（路径可能与 Debian 不同，通常为 /etc/man.config 或 /etc/man_db.conf），添加：

MANPATH_MAP	/bin			/usr/share/man/zh_CN
MANPATH_MAP	/usr/bin		/usr/share/man/zh_CN
MANPATH_MAP	/sbin			/usr/share/man/zh_CN
MANPATH_MAP	/usr/sbin		/usr/share/man/zh_CN

5. 刷新 Man 缓存

1 2	sudo mandb -c

6. 验证效果

1
2
3

man ls
man bash

三、通用优化技巧

1. 临时强制中文手册（无需修改环境变量）

1 2	LANG=zh_CN.UTF-8 man <命令> # 临时指定中文环境

2. 检查中文手册是否存在

若 man <命令> 仍显示英文，直接检查手册文件是否存在：

# 示例：检查 bash 的中文手册（man1 目录）
ls -l /usr/share/man/zh_CN/man1/bash.1.gz

# 若不存在，重新安装 manpages-zh 包

3. 解决 LC_ALL 设置失败问题

LC_ALL 报错 No such file or directory 通常是因为系统未生成该区域配置。即使无法修复，通过 LANG 变量仍可实现中文显示（系统会自动回退到 LANG）。

四、最终验证

完成以上步骤后，执行以下命令确认所有核心命令的中文手册均可显示：

man ls      # 应显示中文
man bash    # 应显示中文
man man     # 应显示中文

总结

通过安装中文语言包、强制生成区域配置、完善 MANPATH_MAP 映射并刷新缓存，即可解决大部分中文 Man 手册显示问题。即使 LC_ALL 设置失败，通过 LANG 变量和正确的路径映射，仍可实现中文手册的显示。

https://www.calcguide.tech/2025/08/15/linux系统中安装中文man-手册

Linux 开发者终极资源导航：全球镜像站 + 核心开发手册（2025 国际中文版）