2025-08-02

process_vm_readv/process_vm_writev 接口详解

基础接口

process_vm_readv(2)

NAME
       process_vm_readv, process_vm_writev - 在进程间传输数据

SYNOPSIS
       #define _GNU_SOURCE
       #include <sys/uio.h>
       
       ssize_t process_vm_readv(pid_t pid,
                               const struct iovec *local_iov,
                               unsigned long liovcnt,
                               const struct iovec *remote_iov,
                               unsigned long riovcnt,
                               unsigned long flags);
       
       ssize_t process_vm_writev(pid_t pid,
                                const struct iovec *local_iov,
                                unsigned long liovcnt,
                                const struct iovec *remote_iov,
                                unsigned long riovcnt,
                                unsigned long flags);

DESCRIPTION
       这些系统调用允许直接在调用进程和指定进程(pid)的内存之间传输数据，
       无需通过内核缓冲区复制。

       process_vm_readv() 从远程进程读取数据到本地进程
       process_vm_writev() 从本地进程写入数据到远程进程

       参数说明:
       - pid: 目标进程ID
       - local_iov: 本地内存区域描述符数组
       - liovcnt: 本地iovec数组元素个数
       - remote_iov: 远程内存区域描述符数组
       - riovcnt: 远程iovec数组元素个数
       - flags: 保留字段，必须为0

RETURN VALUE
       成功时返回传输的字节数，失败时返回-1并设置errno

ERRORS
       EACCES     没有权限访问目标进程内存
       EFAULT     指定的地址范围无效
       EINVAL     参数无效
       ENOMEM     内存不足
       EPERM      没有权限操作目标进程
       ESRCH      目标进程不存在

VERSIONS
       Linux 3.2+ 支持这些系统调用

CONFORMING TO
       这些是Linux特有的系统调用

NOTES
       需要相同用户ID或CAP_SYS_PTRACE权限
       目标进程必须正在运行
       不会触发目标进程的信号处理程序

数据结构

iovec 结构体

struct iovec {
    void  *iov_base;    /* 起始地址 */
    size_t iov_len;     /* 缓冲区长度 */
};

使用示例

示例1：基本内存读取

#define _GNU_SOURCE
#include <sys/uio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

// 目标进程中需要读取的变量
int target_global_var = 42;
char target_string&#91;] = "Hello from target process!";

int read_remote_memory(pid_t target_pid) {
    struct iovec local_iov&#91;2];
    struct iovec remote_iov&#91;2];
    ssize_t result;
    int local_int;
    char local_buffer&#91;100];
    
    // 设置本地缓冲区
    local_iov&#91;0].iov_base = &local_int;
    local_iov&#91;0].iov_len = sizeof(local_int);
    local_iov&#91;1].iov_base = local_buffer;
    local_iov&#91;1].iov_len = sizeof(local_buffer);
    
    // 设置远程内存地址（需要知道目标进程中的确切地址）
    remote_iov&#91;0].iov_base = &target_global_var;  // 实际使用中需要通过其他方式获取
    remote_iov&#91;0].iov_len = sizeof(target_global_var);
    remote_iov&#91;1].iov_base = target_string;
    remote_iov&#91;1].iov_len = strlen(target_string) + 1;
    
    // 读取远程进程内存
    result = process_vm_readv(target_pid,
                             local_iov, 2,
                             remote_iov, 2,
                             0);
    
    if (result == -1) {
        perror("process_vm_readv");
        return -1;
    }
    
    printf("Read %zd bytes\n", result);
    printf("Remote int value: %d\n", local_int);
    printf("Remote string: %s\n", local_buffer);
    
    return 0;
}

示例2：内存写入操作

#define _GNU_SOURCE
#include <sys/uio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

// 目标进程中的可修改变量
int target_writable_var = 100;
char target_writable_buffer&#91;256] = "Original content";

int write_remote_memory(pid_t target_pid) {
    struct iovec local_iov&#91;2];
    struct iovec remote_iov&#91;2];
    ssize_t result;
    int new_value = 999;
    char new_string&#91;] = "Modified by process_vm_writev!";
    
    // 设置本地数据源
    local_iov&#91;0].iov_base = &new_value;
    local_iov&#91;0].iov_len = sizeof(new_value);
    local_iov&#91;1].iov_base = new_string;
    local_iov&#91;1].iov_len = strlen(new_string) + 1;
    
    // 设置远程内存地址
    remote_iov&#91;0].iov_base = &target_writable_var;
    remote_iov&#91;0].iov_len = sizeof(target_writable_var);
    remote_iov&#91;1].iov_base = target_writable_buffer;
    remote_iov&#91;1].iov_len = strlen(new_string) + 1;
    
    // 写入远程进程内存
    result = process_vm_writev(target_pid,
                              local_iov, 2,
                              remote_iov, 2,
                              0);
    
    if (result == -1) {
        perror("process_vm_writev");
        return -1;
    }
    
    printf("Wrote %zd bytes to remote process\n", result);
    return 0;
}

示例3：完整的工作示例

目标进程代码 (target.c)

#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>

volatile int shared_int = 12345;
char shared_string&#91;256] = "This is shared data from target process";
int running = 1;

void signal_handler(int sig) {
    printf("Received signal %d\n", sig);
    running = 0;
}

int main() {
    printf("Target process PID: %d\n", getpid());
    printf("Shared int address: %p\n", &shared_int);
    printf("Shared string address: %p\n", shared_string);
    printf("Shared int value: %d\n", shared_int);
    printf("Shared string value: %s\n", shared_string);
    
    // 安装信号处理程序
    signal(SIGUSR1, signal_handler);
    
    printf("Target process running... Send SIGUSR1 to stop\n");
    
    while (running) {
        printf("shared_int = %d, shared_string = %s\n", 
               shared_int, shared_string);
        sleep(2);
    }
    
    printf("Target process exiting...\n");
    printf("Final values - shared_int = %d, shared_string = %s\n", 
           shared_int, shared_string);
    
    return 0;
}

访问进程代码 (accessor.c)

#define _GNU_SOURCE
#include <sys/uio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <signal.h>

int main(int argc, char *argv&#91;]) {
    if (argc != 2) {
        fprintf(stderr, "Usage: %s <target_pid>\n", argv&#91;0]);
        exit(1);
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    struct iovec local_iov&#91;2];
    struct iovec remote_iov&#91;2];
    ssize_t result;
    int local_int;
    char local_string&#91;256];
    int new_int = 99999;
    char new_string&#91;] = "Modified by accessor process!";
    
    printf("Accessing process PID: %d\n", getpid());
    printf("Target PID: %d\n", target_pid);
    
    // 读取远程进程内存
    printf("\n--- Reading remote memory ---\n");
    local_iov&#91;0].iov_base = &local_int;
    local_iov&#91;0].iov_len = sizeof(local_int);
    local_iov&#91;1].iov_base = local_string;
    local_iov&#91;1].iov_len = sizeof(local_string);
    
    // 注意：这里需要知道目标进程的确切内存地址
    // 在实际应用中，这些地址需要通过调试信息或其他方式获取
    remote_iov&#91;0].iov_base = (void*)0x601040;  // 需要根据实际情况调整
    remote_iov&#91;0].iov_len = sizeof(int);
    remote_iov&#91;1].iov_base = (void*)0x601060;  // 需要根据实际情况调整
    remote_iov&#91;1].iov_len = sizeof(local_string);
    
    result = process_vm_readv(target_pid,
                             local_iov, 2,
                             remote_iov, 2,
                             0);
    
    if (result == -1) {
        perror("process_vm_readv");
        printf("Note: You need to adjust memory addresses based on target process\n");
        return 1;
    }
    
    printf("Read %zd bytes\n", result);
    printf("Remote int value: %d\n", local_int);
    printf("Remote string: %s\n", local_string);
    
    // 修改远程进程内存
    printf("\n--- Writing to remote memory ---\n");
    local_iov&#91;0].iov_base = &new_int;
    local_iov&#91;0].iov_len = sizeof(new_int);
    local_iov&#91;1].iov_base = new_string;
    local_iov&#91;1].iov_len = strlen(new_string) + 1;
    
    result = process_vm_writev(target_pid,
                              local_iov, 2,
                              remote_iov, 2,
                              0);
    
    if (result == -1) {
        perror("process_vm_writev");
        return 1;
    }
    
    printf("Wrote %zd bytes to remote process\n", result);
    
    // 再次读取验证修改
    printf("\n--- Verifying changes ---\n");
    result = process_vm_readv(target_pid,
                             local_iov, 2,
                             remote_iov, 2,
                             0);
    
    if (result != -1) {
        printf("Remote int value after write: %d\n", local_int);
        printf("Remote string after write: %s\n", local_string);
    }
    
    // 发送信号给目标进程
    printf("\n--- Sending signal to target process ---\n");
    if (kill(target_pid, SIGUSR1) == -1) {
        perror("kill");
    } else {
        printf("Signal sent successfully\n");
    }
    
    return 0;
}

示例4：实用工具 - 进程内存检查器

#define _GNU_SOURCE
#include <sys/uio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>

// 简单的内存转储工具
int dump_remote_memory(pid_t pid, unsigned long addr, size_t size) {
    char *buffer;
    struct iovec local_iov&#91;1];
    struct iovec remote_iov&#91;1];
    ssize_t result;
    size_t i;
    
    buffer = malloc(size);
    if (!buffer) {
        perror("malloc");
        return -1;
    }
    
    local_iov&#91;0].iov_base = buffer;
    local_iov&#91;0].iov_len = size;
    
    remote_iov&#91;0].iov_base = (void*)addr;
    remote_iov&#91;0].iov_len = size;
    
    result = process_vm_readv(pid,
                             local_iov, 1,
                             remote_iov, 1,
                             0);
    
    if (result == -1) {
        perror("process_vm_readv");
        free(buffer);
        return -1;
    }
    
    printf("Memory dump at 0x%lx (%zd bytes):\n", addr, result);
    for (i = 0; i < (size_t)result; i++) {
        if (i % 16 == 0) {
            printf("\n%08lx: ", addr + i);
        }
        printf("%02x ", (unsigned char)buffer&#91;i]);
    }
    printf("\n");
    
    // 显示可打印字符
    printf("\nASCII: ");
    for (i = 0; i < (size_t)result; i++) {
        if (i % 16 == 0 && i > 0) {
            printf("\n       ");
        }
        printf("%c", (buffer&#91;i] >= 32 && buffer&#91;i] <= 126) ? buffer&#91;i] : '.');
    }
    printf("\n");
    
    free(buffer);
    return 0;
}

int main(int argc, char *argv&#91;]) {
    if (argc != 4) {
        fprintf(stderr, "Usage: %s <pid> <address> <size>\n", argv&#91;0]);
        fprintf(stderr, "Example: %s 1234 0x601040 64\n", argv&#91;0]);
        exit(1);
    }
    
    pid_t target_pid = atoi(argv&#91;1]);
    unsigned long address = strtoul(argv&#91;2], NULL, 0);
    size_t size = strtoul(argv&#91;3], NULL, 0);
    
    printf("Dumping %zu bytes from process %d at address 0x%lx\n", 
           size, target_pid, address);
    
    return dump_remote_memory(target_pid, address, size);
}

示例5：批量内存操作

#define _GNU_SOURCE
#include <sys/uio.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

#define MAX_IOV 1024

typedef struct {
    void *remote_addr;
    size_t size;
    void *local_buffer;
} memory_region_t;

// 批量读取多个内存区域
int batch_read_memory(pid_t pid, memory_region_t *regions, int count) {
    struct iovec local_iov&#91;MAX_IOV];
    struct iovec remote_iov&#91;MAX_IOV];
    ssize_t result;
    int i;
    
    if (count > MAX_IOV) {
        fprintf(stderr, "Too many regions\n");
        return -1;
    }
    
    // 设置iovec数组
    for (i = 0; i < count; i++) {
        local_iov&#91;i].iov_base = regions&#91;i].local_buffer;
        local_iov&#91;i].iov_len = regions&#91;i].size;
        remote_iov&#91;i].iov_base = regions&#91;i].remote_addr;
        remote_iov&#91;i].iov_len = regions&#91;i].size;
    }
    
    result = process_vm_readv(pid,
                             local_iov, count,
                             remote_iov, count,
                             0);
    
    if (result == -1) {
        perror("process_vm_readv");
        return -1;
    }
    
    printf("Batch read completed: %zd bytes total\n", result);
    return 0;
}

// 批量写入多个内存区域
int batch_write_memory(pid_t pid, memory_region_t *regions, int count) {
    struct iovec local_iov&#91;MAX_IOV];
    struct iovec remote_iov&#91;MAX_IOV];
    ssize_t result;
    int i;
    
    if (count > MAX_IOV) {
        fprintf(stderr, "Too many regions\n");
        return -1;
    }
    
    // 设置iovec数组
    for (i = 0; i < count; i++) {
        local_iov&#91;i].iov_base = regions&#91;i].local_buffer;
        local_iov&#91;i].iov_len = regions&#91;i].size;
        remote_iov&#91;i].iov_base = regions&#91;i].remote_addr;
        remote_iov&#91;i].iov_len = regions&#91;i].size;
    }
    
    result = process_vm_writev(pid,
                              local_iov, count,
                              remote_iov, count,
                              0);
    
    if (result == -1) {
        perror("process_vm_writev");
        return -1;
    }
    
    printf("Batch write completed: %zd bytes total\n", result);
    return 0;
}

int main() {
    printf("Batch memory operations example\n");
    printf("This example shows how to perform batch operations\n");
    printf("You need to provide actual PID and memory addresses\n");
    return 0;
}

使用限制和注意事项

权限要求

相同用户: 调用进程和目标进程必须有相同的有效用户ID

ptrace权限: 或者调用进程需要 CAP_SYS_PTRACE 权能

进程状态: 目标进程必须正在运行（不是僵尸进程）

安全限制

内存保护: 不能访问目标进程的受保护内存区域

地址有效性: 必须确保远程地址在目标进程的有效地址空间内

对齐要求: 某些体系结构可能有内存对齐要求

技术限制

最大IOV数量: 系统可能限制iovec数组的最大长度

性能考虑: 大量小的传输可能不如批量传输高效

原子性: 单次调用内的多个传输不是原子的

错误处理

部分传输: 可能只传输部分数据，需要检查返回值

地址错误: 无效地址会导致整个操作失败

进程终止: 目标进程在操作过程中终止会导致错误

最佳实践

地址获取: 使用调试信息或符号表获取准确的内存地址

缓冲区管理: 确保本地缓冲区足够大且生命周期正确

错误检查: 始终检查返回值并处理错误情况

权限验证: 在执行操作前验证权限和进程状态

这些系统调用提供了强大的进程间内存访问能力，但使用时需要谨慎处理安全和权限问题。

2025-08-02

C语言开发

YARA 的C语言完整集成与开发方法

以下是 YARA 的 C 语言完整集成与开发方法，包括：

YARA C API 的使用
规则编译、文件扫描、回调处理
完整的 C 示例代码
编译与链接方法
高级用法（自定义外部变量、模块支持等）

🧰 一、前置条件

1. 安装 YARA 开发库

Ubuntu/Debian

1 2	sudo apt-get install libyara-dev yara

CentOS/RHEL

sudo yum install yara-devel
# 或使用 dnf（新版本）
sudo dnf install yara-devel

macOS

1 2	brew install yara

确保 yara 命令可用，并且头文件（yara.h）和库文件（libyara.so / libyara.a）已安装。

📦 二、YARA C API 核心概念

YARA C API 主要包含以下组件：

组件说明YR_COMPILER用于编译 YARA 规则YR_RULES编译后的规则集合yr_rules_scan_*(…)扫描文件/内存/文件描述符YR_CALLBACK_FUNC匹配回调函数（接收匹配结果）

🧪 三、完整 C 示例代码：扫描文件并输出匹配结果

文件：yara_scan.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <yara.h>

// 回调函数：处理匹配结果
int callback_function(YR_SCAN_CONTEXT* context, int message, void* message_data, void* user_data)
{
    YR_RULE* rule;
    YR_STRING* string;
    YR_MATCH* match;

    switch (message)
    {
        case CALLBACK_MSG_RULE_MATCHING:
            rule = (YR_RULE*) message_data;
            printf("&#91;+] 匹配规则: %s", rule->identifier);

            if (rule->tags&#91;0] != NULL)
            {
                printf(" (标签: ");
                for (int i = 0; rule->tags&#91;i] != NULL; i++)
                {
                    if (i > 0) printf(", ");
                    printf("%s", rule->tags&#91;i]);
                }
                printf(")");
            }
            printf("\n");

            // 打印匹配的字符串
            yr_rule_strings_foreach(rule, string)
            {
                yr_string_matches_foreach(context, string, match)
                {
                    printf("    ├─ 字符串: %s = \"%.*s\" @ 0x%llx\n",
                           string->identifier,
                           match->data_length,
                           match->data,
                           match->base + match->offset);
                }
            }
            break;

        case CALLBACK_MSG_RULE_NOT_MATCHING:
            // 可选：打印未匹配的规则
            // rule = (YR_RULE*) message_data;
            // printf("&#91;-] 未匹配: %s\n", rule->identifier);
            break;

        case CALLBACK_MSG_SCAN_FINISHED:
            // 扫描完成
            break;

        default:
            break;
    }

    return CALLBACK_CONTINUE;  // 继续扫描
}

int main(int argc, char** argv)
{
    if (argc != 3)
    {
        fprintf(stderr, "用法: %s <规则文件.yar> <目标文件>\n", argv&#91;0]);
        return 1;
    }

    const char* rules_file = argv&#91;1];
    const char* target_file = argv&#91;2];

    YR_COMPILER* compiler = NULL;
    YR_RULES* rules = NULL;
    FILE* rule_fh = NULL;
    int result;

    // 初始化 YARA
    yr_initialize();

    // 创建编译器
    result = yr_compiler_create(&compiler);
    if (result != ERROR_SUCCESS)
    {
        fprintf(stderr, "无法创建编译器: %d\n", result);
        goto cleanup;
    }

    // 打开规则文件
    rule_fh = fopen(rules_file, "r");
    if (!rule_fh)
    {
        perror("无法打开规则文件");
        result = -1;
        goto cleanup;
    }

    // 编译规则（从文件）
    yr_compiler_add_file(compiler, rule_fh, NULL, rules_file);

    // 检查编译错误
    if (compiler->errors > 0)
    {
        fprintf(stderr, "规则编译失败！\n");
        goto cleanup;
    }

    // 获取编译后的规则
    result = yr_compiler_get_rules(compiler, &rules);
    if (result != ERROR_SUCCESS)
    {
        fprintf(stderr, "无法获取规则: %d\n", result);
        goto cleanup;
    }

    // 扫描目标文件
    printf("🔍 开始扫描文件: %s\n", target_file);
    result = yr_rules_scan_file(rules, target_file, 0, callback_function, NULL, 0);

    if (result != ERROR_SUCCESS)
    {
        fprintf(stderr, "扫描失败: %d\n", result);
    }

cleanup:
    // 清理资源
    if (rule_fh) fclose(rule_fh);
    if (rules) yr_rules_destroy(rules);
    if (compiler) yr_compiler_destroy(compiler);
    yr_finalize();

    return result == ERROR_SUCCESS ? 0 : 1;
}

🔧 四、编写测试 YARA 规则文件

文件：test.yar

rule HelloWorldRule
{
    tags: test demo

    strings:
        $greeting = "Hello, World!" ascii
        $number = { 48 65 6C 6C 6F }  // "Hello" in hex

    condition:
        all of them
}

🛠️ 五、编译 C 程序

方法 1：使用 gcc 直接编译

1 2	gcc -o yara_scan yara_scan.c -lyara

如果提示找不到 -lyara，请确认 libyara.so 在 /usr/lib 或 /usr/local/lib。

方法 2：指定路径（如自定义安装）

gcc -o yara_scan yara_scan.c \
    -I/usr/local/include \
    -L/usr/local/lib \
    -lyara

▶️ 六、运行测试

# 创建测试文件
echo "Hello, World!" > test.txt

# 编译并运行
gcc -o yara_scan yara_scan.c -lyara
./yara_scan test.yar test.txt

预期输出：

🔍 开始扫描文件: test.txt
&#91;+] 匹配规则: HelloWorldRule (标签: test, demo)
    ├─ 字符串: $greeting = "Hello, World!" @ 0x0
    ├─ 字符串: $number = "Hello" @ 0x0

⚙️ 七、高级功能集成

1. 使用外部变量（External Variables）

修改规则：

rule HasKeyword
{
    condition:
        contains(text, keyword)
}

在 C 中设置外部变量：

YR_EXTERNAL_VARIABLE ext_vars&#91;] = {
    { .identifier = "keyword", .type = YE_STRING, .value = {.ss = "malware"} },
    { .identifier = "text",    .type = YE_STRING, .value = {.ss = "this is a malware sample"} },
    { .identifier = NULL }  // 结束标记
};

// 扫描时传入
yr_rules_scan_mem(..., callback, NULL, 0, ext_vars);

2. 扫描内存缓冲区

char buffer&#91;] = "Hello, World!";
size_t buffer_len = strlen(buffer);

yr_rules_scan_mem(rules, buffer, buffer_len, 0, callback_function, NULL, 0);

3. 启用内置模块（如 hash, pe, elf）

// 启用 PE 模块
yr_rules_load(rules, "compiled_rules.bin");  // 可选：保存编译后的规则
yr_initialize();

// 启用模块（必须在扫描前）
yr_enable_features(YARA_FEATURE_EXTERNAL_VARIABLES | YARA_FEATURE_MODULES);

// 或使用宏控制
#define YR_ENABLE_CRYPTO

注意：使用 pe, hash 等模块需在编译 YARA 时启用对应功能。

📁 八、保存/加载编译后的规则（提高性能）

// 保存编译后的规则到文件
yr_rules_save(rules, "compiled.yarc");

// 加载已编译规则
YR_RULES* loaded_rules;
yr_rules_load("compiled.yarc", &loaded_rules);

// 直接扫描
yr_rules_scan_file(loaded_rules, target_file, 0, callback, NULL, 0);

// 销毁
yr_rules_destroy(loaded_rules);

适用于规则不变、频繁扫描的场景（如安全网关、EDR）。

🧩 九、常见错误与解决

错误原因解决undefined reference to ‘yr_initialize’未链接 -lyara添加 -lyaracannot open shared object libyara.so动态库未找到sudo ldconfig 或设置 LD_LIBRARY_PATH编译规则失败语法错误使用 yara test.yar /bin/ls 测试规则内存扫描崩溃缓冲区为空或未初始化检查指针和长度

📚 十、官方文档与资源

YARA C API 文档：https://yara.readthedocs.io/en/stable/capi.html
头文件参考：/usr/include/yara.h
示例代码：https://github.com/VirusTotal/yara/tree/master/tests
YARA 构建说明：https://github.com/VirusTotal/yara#building-from-sources

✅ 总结

通过 C 语言集成 YARA，你可以：

将 YARA 深度嵌入到安全产品中（如杀毒引擎、EDR、防火墙）
实现高性能、低延迟的实时扫描
与自定义解析器、沙箱、驱动联动
支持跨平台（Linux、Windows、macOS）

2025-08-02

Linux命令

多编码的测试文本文件

生成一系列多语言、多编码的测试文本文件，文件扩展名为 .txt，用于测试 libmagic、chardet、cchardet 等工具对不同语种和编码的识别能力。

这些文件将覆盖：

✅ 主要语言：中文、日文、韩文、阿拉伯文、俄文、西欧文、UTF-16 等
✅ 常见编码：UTF-8、GBK、Big5、Shift_JIS、EUC-KR、ISO-8859-1、UTF-16LE/BE
✅ 文件命名规范：<语言>_<编码>.txt

🛠️ 第一步：创建工作目录

1 2	mkdir ~/encoding-test cd ~/encoding-test

🧪 第二步：生成各文种编码测试文件（.txt）

1. UTF-8（通用 Unicode）

cat > zh_utf8.txt << 'EOF'
中文测试：卫星状态正常
时间：2025-04-05T12:00:00Z
消息：系统在线，载荷激活
EOF

cat > ja_utf8.txt << 'EOF'
日本語テスト：衛星ステータス正常
時刻：2025-04-05T12:00:00Z
メッセージ：システム起動中
EOF

cat > ko_utf8.txt << 'EOF'
한국어 테스트: 위성 상태 정상
시간: 2025-04-05T12:00:00Z
메시지: 시스템 온라인
EOF

cat > ar_utf8.txt << 'EOF'
اختبار عربي: الحالة طبيعية
الوقت: 2025-04-05T12:00:00Z
الرسالة: النظام يعمل
EOF

cat > ru_utf8.txt << 'EOF'
Тест на русском: Состояние нормальное
Время: 2025-04-05T12:00:00Z
Сообщение: Система работает
EOF

cat > en_utf8.txt << 'EOF'
English Test: Status Nominal
Time: 2025-04-05T12:00:00Z
Message: System Online
EOF

cat > fr_utf8.txt << 'EOF'
Test français : État normal
Heure : 2025-04-05T12:00:00Z
Message : Système en ligne
EOF

2. GBK（简体中文）

1 2	# 从 UTF-8 转为 GBK iconv -f UTF-8 -t GBK zh_utf8.txt -o zh_gbk.txt

3. Big5（繁体中文）

1	iconv -f UTF-8 -t BIG5 zh_utf8.txt -o zh_big5.txt

4. Shift_JIS（日文）

1	iconv -f UTF-8 -t SHIFT_JIS ja_utf8.txt -o ja_shift_jis.txt

5. EUC-KR（韩文）

1	iconv -f UTF-8 -t EUC-KR ko_utf8.txt -o ko_euc_kr.txt

6. ISO-8859-1（西欧，如法语、德语）

1	iconv -f UTF-8 -t ISO-8859-1 fr_utf8.txt -o fr_latin1.txt

7. UTF-16LE（小端 Unicode）

1	echo "UTF-16LE Test: 多语言混合" \| iconv -t UTF-16LE -o mixed_utf16le.txt

8. UTF-16BE（大端 Unicode）

1	echo "UTF-16BE Test: Satellite Message" \| iconv -t UTF-16BE -o en_utf16be.txt

9. ASCII（纯英文，无扩展字符）

cat > en_ascii.txt << 'EOF'
Satellite Telemetry Log
Status: Nominal
Time: 2025-04-05T12:00:00Z
EOF

10. UTF-8 with BOM（带签名）

1
2
3

# 手动生成带 BOM 的 UTF-8
printf '\xEF\xBB\xBF' > zh_utf8_bom.txt
cat zh_utf8.txt >> zh_utf8_bom.txt

✅ 第三步：验证文件生成成功

1	ls -l *.txt

你应该看到类似：

-rw-r--r-- 1 user user  123 Apr  5 12:00 zh_utf8.txt
-rw-r--r-- 1 user user  123 Apr  5 12:00 zh_gbk.txt
-rw-r--r-- 1 user user  123 Apr  5 12:00 zh_big5.txt
-rw-r--r-- 1 user user  123 Apr  5 12:00 ja_shift_jis.txt
-rw-r--r-- 1 user user  123 Apr  5 12:00 ko_euc_kr.txt
-rw-r--r-- 1 user user  123 Apr  5 12:00 fr_latin1.txt
-rw-r--r-- 1 user user  123 Apr  5 12:00 en_utf16le.txt
...

🔍 第四步：使用 file 命令识别类型

1	file *.txt

✅ 预期输出示例：

en_ascii.txt:        ASCII text
en_utf8.txt:         UTF-8 Unicode text
zh_gbk.txt:          ISO-8859 text
zh_big5.txt:         ISO-8859 text
ja_shift_jis.txt:    ISO-8859 text
ko_euc_kr.txt:       ISO-8859 text
fr_latin1.txt:       ISO-8859 text
mixed_utf16le.txt:   Little-endian UTF-16 Unicode text
en_utf16be.txt:      Big-endian UTF-16 Unicode text
zh_utf8_bom.txt:     UTF-8 Unicode (with BOM) text

⚠️ 注意：zh_gbk.txt 被识别为 ISO-8859 text —— 这正是我们之前说的：libmagic 无法区分 GBK 和 Latin-1

🧪 第五步：使用 cchardet 精确识别编码

1	pip3 install cchardet

创建 detect_all.py：

#!/usr/bin/env python3
import cchardet
import glob

for filepath in sorted(glob.glob("*.txt")):
    with open(filepath, 'rb') as f:
        raw = f.read()
    result = cchardet.detect(raw)
    encoding = result&#91;'encoding']
    confidence = result&#91;'confidence']
    print(f"{filepath:20} → {encoding:10} (置信度: {confidence:.2f})")

运行：

1	python3 detect_all.py

✅ 预期输出（示例）：

en_ascii.txt         → ascii       (置信度: 1.00)
en_utf16be.txt       → UTF-16BE    (置信度: 1.00)
en_utf8.txt          → UTF-8       (置信度: 1.00)
fr_latin1.txt        → ISO-8859-1  (置信度: 1.00)
ja_shift_jis.txt     → SHIFT_JIS   (置信度: 0.99)
ko_euc_kr.txt        → EUC-KR      (置信度: 0.99)
zh_big5.txt          → Big5        (置信度: 0.99)
zh_gbk.txt           → GB2312      (置信度: 0.99)
zh_utf8_bom.txt      → UTF-8       (置信度: 1.00)
zh_utf8.txt          → UTF-8       (置信度: 1.00)

✅ 成功识别 GBK 为 GB2312（GB2312 是 GBK 的子集，可接受）

✅ 总结：各文种编码测试文件清单

语言编码文件名中文（简体）UTF-8zh_utf8.txt中文（简体）GBKzh_gbk.txt中文（繁体）Big5zh_big5.txt日文Shift_JISja_shift_jis.txt韩文EUC-KRko_euc_kr.txt俄文UTF-8ru_utf8.txt阿拉伯文UTF-8ar_utf8.txt法文ISO-8859-1fr_latin1.txt英文ASCIIen_ascii.txt英文UTF-16LEen_utf16le.txt英文UTF-16BEen_utf16be.txt中文UTF-8 with BOMzh_utf8_bom.txt

🛰️ 在卫星通信中的应用

你可以用这些文件：

测试地面站软件的编码自动识别能力
训练 chardet 模型（可选）
构建“多语言遥测解析引擎”
自动化处理来自不同国家卫星的数据

2025-08-02

C语言开发

安装使用libmagic库

如果你遇到的错误：

detect.c:5:10: fatal error: magic.h: No such file or directory
    5 | #include <magic.h>
      |          ^~~~~~~~~
compilation terminated.

说明编译器找不到 magic.h 头文件，这是 libmagic 开发包的一部分。虽然你可能已经安装了 file 命令，但没有安装其开发头文件（devel/dev 包）。

✅ 解决方案：安装 libmagic 的开发包

🔧 Ubuntu / Debian 系统

你需要安装 libmagic-dev：

1
2
3

sudo apt update
sudo apt install -y libmagic-dev

✅ 这个包会提供：

/usr/include/magic.h ← 编译时需要

/usr/lib/x86_64-linux-gnu/libmagic.so ← 链接时需要

安装完成后，重新编译：

1 2	gcc -o detect detect.c -lmagic

✅ 应该可以成功编译。

🔧 CentOS / RHEL / Rocky Linux / AlmaLinux

sudo yum install -y file-devel
# 或者在较新版本中：
sudo dnf install -y file-devel

然后编译：

1 2	gcc -o detect detect.c -lmagic

🔧 Fedora

1 2	sudo dnf install -y file-devel

🔧 openSUSE

1 2	sudo zypper install -y libmagic-devel

🔍 验证头文件是否已安装

运行以下命令，检查 magic.h 是否存在：

1 2	ls /usr/include/magic.h

✅ 正常输出：

1 2	/usr/include/magic.h

如果没有输出，说明 libmagic-dev 还没装好。

📦 验证库文件是否存在

1 2	find /usr/lib* -name "libmagic.*" \| grep -v pkgconfig

你应该看到类似：

1
2
3

/usr/lib/x86_64-linux-gnu/libmagic.so
/usr/lib/x86_64-linux-gnu/libmagic.a

🛠️ 重新编译（确认成功）

// detect.c - 使用 libmagic 识别卫星数据类型
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <magic.h>  // libmagic 头文件

// 打印错误并退出
void die(magic_t magic) {
    fprintf(stderr, "Error: %s\n", magic_error(magic));
    magic_close(magic);
    exit(1);
}

// 分析单个文件
void analyze_file(const char *filename) {
    magic_t magic;

    // 1. 创建 magic handle
    magic = magic_open(MAGIC_MIME_TYPE | MAGIC_MIME_ENCODING);
    if (!magic) {
        fprintf(stderr, "Failed to initialize libmagic\n");
        exit(1);
    }

    // 2. 加载 magic 数据库
    if (magic_load(magic, NULL) != 0) {  // NULL 表示使用默认数据库
        die(magic);
    }

    // 3. 获取 MIME 类型和编码
    const char *mime = magic_file(magic, filename);
    const char *encoding = NULL;

    // 分离 MIME 和编码（magic_file 返回 "type; charset=xxx"）
    char *semicolon = strchr(mime, ';');
    char mime_type&#91;64] = {0};
    char charset&#91;32] = {0};

    if (semicolon) {
        strncpy(mime_type, mime, semicolon - mime);
        sscanf(semicolon, "; charset=%s", charset);
    } else {
        strcpy(mime_type, mime);
        strcpy(charset, "unknown");
    }

    // 4. 打印结果
    printf("📄 %s:\n", filename);
    printf("   MIME 类型: %s\n", mime_type);
    printf("   字符编码: %s\n", charset);

    // 5. 卫星场景智能判断
    if (strcmp(mime_type, "text/plain") == 0) {
        printf("   🛰️ 判定: 明文遥测/日志文件 → 可直接解析\n");
    } else if (strstr(mime_type, "image/")) {
        printf("   🛰️ 判定: 遥感图像 → 保存为可视化数据\n");
    } else if (strcmp(mime_type, "application/gzip") == 0 ||
               strcmp(mime_type, "application/zip") == 0) {
        printf("   🛰️ 判定: 压缩数据 → 需解压后进一步分析\n");
    } else if (strcmp(mime_type, "application/x-executable") == 0) {
        printf("   🛰️ 判定: 固件更新包 → 验证签名后升级\n");
    } else if (strcmp(mime_type, "application/octet-stream") == 0) {
        printf("   🛰️ 判定: 二进制流 → 可能为加密或自定义协议\n");
    } else {
        printf("   🛰️ 判定: 未知类型 → 需人工分析\n");
    }
    printf("\n");

    // 6. 释放资源
    magic_close(magic);
}

int main(int argc, char *argv&#91;]) {
    if (argc < 2) {
        fprintf(stderr, "用法: %s <文件1> &#91;文件2] ...\n", argv&#91;0]);
        exit(1);
    }

    printf("🔍 启动 libmagic C 接口 - 卫星数据类型识别\n");
    printf("------------------------------------------------\n\n");

    for (int i = 1; i < argc; i++) {
        analyze_file(argv&#91;i]);
    }

    return 0;
}

1 2	gcc -o detect detect.c -lmagic

✅ 成功后运行：

1 2	./detect telemetry.txt

你应该看到输出：

📄 telemetry.txt:
   MIME 类型: text/plain
   字符编码: us-ascii
   🛰️ 判定: 明文遥测/日志文件 → 可直接解析

❌ 常见错误排查

错误原因解决magic.h: No such file or directory缺少 libmagic-dev安装 libmagic-devundefined reference to ‘magic_open’忘了 -lmagic编译时加 -lmagicerror while loading shared libraries: libmagic.so.1运行时库缺失安装 libmagic1（Ubuntu）或 file（其他）

如果是 64 位系统但提示库找不到，可尝试软链接（极少见）：

1 2	sudo ln -s /usr/lib/x86_64-linux-gnu/libmagic.so /usr/lib/libmagic.so

✅ 总结

你遇到的问题是典型的 “缺少开发头文件” 错误。

只需一步修复（Ubuntu/Debian）：

1 2	sudo apt install -y libmagic-dev

然后重新编译：

1 2	gcc -o detect detect.c -lmagic

✅ 问题解决。

如果你现在运行 ./detect * 能正确识别 PNG、GZIP、ELF 等文件类型，说明你的 C 语言调用 libmagic 环境已完全就绪

2025-08-02

Linux命令

识别和分类恶意软件样本的工具YARA

https://calcguide.tech/2025/08/06/yara-a-tool-for-identifying-and-classifying-malware-samples/

YARA 是一个用于识别和分类恶意软件样本的工具，广泛应用于恶意软件分析、威胁情报、入侵检测等领域。它通过编写规则（YARA Rules）来匹配文件中的特定字符串、十六进制模式、正则表达式等特征。

一、YARA 的基本使用方法

1. 安装 YARA

Linux（Ubuntu/Debian）

1 2	sudo apt-get install yara

macOS

1 2	brew install yara

Python 安装（推荐用于集成）

1 2	pip install yara-python

注意：yara-python 是 YARA 的 Python 绑定，允许你在 Python 脚本中使用 YARA。

friend link：(2 封私信) YARA-规则匹配神器-简单使用篇 - 知乎

Yara规则安装和使用学习-RedLine编写Yara检测-先知社区

github：https://github.com/VirusTotal/yara

2. 编写 YARA 规则（.yar 文件）

创建一个简单的 YARA 规则文件，例如 example.yar：

rule HelloWorld
{
    meta:
        author = "YourName"
        description = "Detects the string 'Hello, World!'"

    strings:
        $hello = "Hello, World!" ascii

    condition:
        $hello
}

3. 使用命令行运行 YARA

1 2	yara example.yar target_file.txt

如果 target_file.txt 中包含 Hello, World!，则会输出：

1 2	HelloWorld target_file.txt

二、YARA 集成到 Python 脚本（示例 Demo）

示例：使用 yara-python 扫描文件

import yara

# 编译规则
rules = yara.compile(filepath='example.yar')

# 扫描目标文件
matches = rules.match('target_file.txt')

# 输出结果
if matches:
    print("匹配到规则：")
    for match in matches:
        print(match)
else:
    print("未匹配到任何规则")

示例：从字符串加载规则（无需文件）

import yara

# 直接在代码中定义规则
rule_source = '''
rule HelloWorld
{
    strings:
        $hello = "Hello, World!" ascii
    condition:
        $hello
}
'''

# 编译规则
rules = yara.compile(source=rule_source)

# 扫描文件
matches = rules.match('target_file.txt')
print(matches)

示例：扫描目录中的所有文件

import yara
import os

def scan_directory(directory, rules):
    for root, dirs, files in os.walk(directory):
        for file in files:
            filepath = os.path.join(root, file)
            try:
                matches = rules.match(filepath)
                if matches:
                    print(f"&#91;+] 匹配: {filepath} -> {matches}")
            except Exception as e:
                print(f"&#91;-] 错误扫描 {filepath}: {e}")

# 加载规则
rules = yara.compile(filepath='example.yar')

# 扫描目录
scan_directory('/path/to/scan', rules)

三、高级 YARA 规则示例

检测 PE 文件中的特定导入函数（Windows 恶意软件常见）

import "pe"

rule SuspiciousPE
{
    meta:
        description = "检测包含可疑 API 调用的 PE 文件"

    condition:
        pe.is_pe and
        any of ($suspicious_funcs) in (pe.imported_functions)
    
    strings:
        $suspicious_funcs = "VirtualAllocEx"
        $suspicious_funcs = "WriteProcessMemory"
        $suspicious_funcs = "CreateRemoteThread"
}

注意：使用 pe 模块需要目标文件是有效的 PE 文件。

四、YARA 与 SIEM/SOC 集成思路

定时扫描文件系统：使用 Python 脚本定期扫描上传目录或临时目录。

与文件上传服务集成：在 Web 应用中，用户上传文件后自动调用 YARA 扫描。

结合 ELK/Splunk：将扫描结果发送到日志系统进行告警。

沙箱联动：在动态分析沙箱中运行样本后，使用 YARA 提取特征。

五、实用技巧

使用 –rules 参数查看已编译规则结构：yara -r example.yar /path/to/files
忽略大小写：使用 nocase 修饰符$a = “virus” nocase
正则表达式支持：$re = /https?://[a-zA-Z0-9./]*/
使用 uint32be(0) 检测文件头：$mz = { 4D 5A } // PE 文件头 condition: $mz at 0

六、常见问题

编译错误：检查语法，YARA 对缩进和标点敏感。
性能问题：避免过于宽泛的规则，使用 ascii, wide, nocase 精确控制。
权限问题：扫描系统文件可能需要管理员权限。

七、资源推荐

YARA 官方文档：https://yara.readthedocs.io/

YARA Rules 仓库：

在线规则测试：https://yara-web.vercel.app/

总结

YARA 是一个强大灵活的模式匹配工具，适合用于：

恶意软件检测
威胁狩猎（Threat Hunting）
自动化分析流水线
安全产品集成（EDR、AV、沙箱）

通过 yara-python，你可以轻松将其集成到你的安全工具或平台中。

如需更复杂的集成（如多线程扫描、规则热加载、Web API 封装），可进一步封装为 REST 服务（使用 Flask/FastAPI）。

2025-08-02

C语言开发

静态链接 YARA 库方法

如何静态链接 YARA 库，实现免依赖的独立可执行文件部署适用于嵌入式设备、安全产品发布、或避免目标系统安装 YARA 共享库的场景。

✅ 目标

编译 YARA 为静态库（.a）
使用静态库编译你的 C 程序
生成完全静态链接的可执行文件（不依赖 libyara.so）
实现“拷贝即运行”的免依赖部署

🧰 一、环境准备（以 Linux 为例）

推荐在干净的构建环境中操作（如 Ubuntu 20.04/22.04）：

1
2
3

sudo apt-get update
sudo apt-get install build-essential autoconf automake libtool pkg-config

🔧 二、从源码编译 YARA（静态库模式）

1. 下载 YARA 源码

git clone https://github.com/VirusTotal/yara.git
cd yara
git checkout v4.3.2  # 推荐稳定版本（或最新 v4.x）

✅ 注意：静态编译需关闭动态库生成，开启静态库。

2. 配置并编译（仅静态库）

./bootstrap.sh          # 第一次需要生成 configure 脚本
./configure \
    --enable-static \
    --disable-shared \
    --disable-magic \
    --without-crypto \
    --prefix=/usr/local

参数说明：

参数说明–enable-static生成 .a 静态库–disable-shared禁止生成 .so 动态库–disable-magic禁用 libmagic（避免额外依赖）–without-crypto禁用 OpenSSL（hash.md5 等模块）–prefix安装路径

⚠️ 如果你需要 hash 模块（如 hash.md5），需安装 OpenSSL 并启用：

1
2
3

sudo apt-get install libssl-dev
./configure ... --with-crypto

3. 编译并安装

1
2
3

make -j$(nproc)
sudo make install

安装后你会看到：

静态库：/usr/local/lib/libyara.a
头文件：/usr/local/include/yara.h, /usr/local/include/yara/*

📦 三、编写测试程序（复用之前的示例）

保存为 yara_static.c：

#include <stdio.h>
#include <yara.h>

int callback(YR_SCAN_CONTEXT* context, int msg, void* data, void* user_data)
{
    if (msg == CALLBACK_MSG_RULE_MATCHING)
    {
        printf("✅ 匹配规则: %s\n", ((YR_RULE*)data)->identifier);
    }
    return CALLBACK_CONTINUE;
}

int main()
{
    YR_COMPILER* compiler;
    YR_RULES* rules;
    FILE* fh;

    yr_initialize();

    yr_compiler_create(&compiler);
    fh = fopen("test.yar", "r");
    if (!fh) { perror("规则文件"); return 1; }

    yr_compiler_add_file(compiler, fh, NULL, NULL);

    if (compiler->errors > 0) {
        printf("规则编译失败\n");
        return 1;
    }

    yr_compiler_get_rules(compiler, &rules);
    fclose(fh);

    // 扫描自身（文本匹配）
    yr_rules_scan_file(rules, "yara_static.c", 0, callback, NULL, 0);

    yr_rules_destroy(rules);
    yr_compiler_destroy(compiler);
    yr_finalize();

    return 0;
}

🔗 四、静态编译你的程序

gcc -o yara_static yara_static.c \
    -I/usr/local/include \
    /usr/local/lib/libyara.a \
    -lpthread -lm -lz

关键点说明：

库为什么需要libyara.aYARA 静态库（主逻辑）-lpthreadYARA 使用线程-lm数学函数（某些模块使用）-lzzlib，用于处理压缩或内存操作

✅ 此时生成的 yara_static 是完全静态链接的可执行文件（不依赖任何外部 .so）。

🔍 五、验证是否静态链接成功

1
2
3

# 检查是否依赖动态库
ldd yara_static

✅ 正确输出应为：

1 2	not a dynamic executable

或：

1 2	statically linked

❌ 如果显示 libyara.so，说明仍动态链接了，需检查编译命令。

🧪 六、测试运行

创建规则文件 test.yar：

rule FoundCFile
{
    strings:
        $main = "main()" ascii
    condition:
        $main
}

运行：

1 2	./yara_static

输出：

1 2	✅ 匹配规则: FoundCFile

🚀 七、跨平台静态编译（可选：生成 Windows 版）

你可以使用交叉编译生成 Windows 静态可执行文件（.exe）：

安装交叉编译器（Ubuntu）

1 2	sudo apt-get install gcc-mingw-w64

重新编译 YARA（Windows 静态库）

# 在 yara 源码目录
make clean

CC=x86_64-w64-mingw32-gcc \
CFLAGS="-static" \
./configure \
    --host=x86_64-w64-mingw32 \
    --enable-static \
    --disable-shared \
    --disable-magic \
    --without-crypto \
    --prefix=/tmp/yara-win64

make && make install

编译 Windows 可执行文件

x86_64-w64-mingw32-gcc -o yara.exe yara_static.c \
    -I/tmp/yara-win64/include \
    /tmp/yara-win64/lib/libyara.a \
    -lpthread -lm -lz -static

生成的 yara.exe 可在 Windows 上直接运行，无需安装任何依赖！

📦 八、部署建议

场景建议安全网关/EDR静态链接，嵌入扫描引擎嵌入式设备使用 musl-gcc 编译更小体积分析工具发布打包为单文件工具（如 yara-scan.exe）容器内运行无需安装 libyara，减少镜像体积

📏 九、生成文件大小优化（可选）

静态链接后体积较大（~2-5MB），可通过以下方式减小：

# 1. 编译时优化
gcc -Os -s -DNDEBUG ...  # 启用优化、去符号、关闭调试

# 2. 去除符号表
strip --strip-all yara_static

# 3. 使用 upx 压缩（可选）
upx --best --compress-exports=0 yara_static

可将 3MB → 1MB 左右。

✅ 总结：静态链接 YARA 的完整流程

# 1. 编译 YARA 静态库
./bootstrap.sh
./configure --enable-static --disable-shared --without-crypto
make && sudo make install

# 2. 编译你的程序（静态链接）
gcc -o myscanner myscan.c /usr/local/lib/libyara.a -lpthread -lm -lz -static

# 3. 验证
ldd myscanner  # 应显示 not a dynamic executable

# 4. 部署到任意 Linux 主机，无需安装 YARA

📎 附件：一键构建脚本（build_static_yara.sh）

#!/bin/bash
git clone https://github.com/VirusTotal/yara.git
cd yara
git checkout v4.3.2

./bootstrap.sh
./configure --enable-static --disable-shared --disable-magic --without-crypto --prefix=/usr/local
make -j$(nproc)
sudo make install

echo "✅ YARA 静态库已安装"
echo "现在你可以使用 /usr/local/lib/libyara.a 进行静态编译"

静态链接 YARA 库方法, 静态链接 YARA 实现独立可执行文件, YARA 库静态链接教程, 如何静态链接 YARA 库, YARA 静态编译方法, 嵌入式设备 YARA 部署方案, 安全产品 YARA 依赖优化, 避免依赖 YARA 库的解决方案, YARA 库免依赖部署指南, 静态链接 YARA 用于安全产品

2025-07-26

Linux系统编程

Linux C 编程核心函数详解

1. `fork` - 进程管理

函数介绍及归类

类别： 系统调用 (System Call)
头文件： #include
作用： 创建一个当前进程的副本（子进程）。这是 Linux 下创建新进程的主要方式。

函数原型

1 2	#include pid_t fork(void);

功能

fork() 调用一次，返回两次。它会创建一个与调用进程（父进程）几乎完全相同的子进程。子进程会从 fork() 返回后的下一条指令开始执行。

参数

无参数。

返回值

在父进程中： 返回新创建的子进程的进程 ID (PID)，这是一个正整数。
在子进程中： 返回 0。
失败时： 在父进程中返回 -1，并设置全局变量 errno 来指示错误原因（如系统资源不足）。

相似函数或关联函数

vfork(): 类似于 fork()，但在子进程调用 exec 或 _exit 之前，父进程会被阻塞。通常用于紧接着调用 exec 的场景，效率可能稍高。
wait(), waitpid(): 父进程用来等待子进程结束。
getpid(): 获取当前进程的 PID。
getppid(): 获取当前进程的父进程 PID。

示例代码 (C89)

#include    /* printf, perror */
#include   /* fork, getpid, getppid */
#include  /* pid_t */
#include   /* wait */

int main() {
    pid_t pid;
    int status;

    /* 调用 fork 创建子进程 */
    pid = fork();

    if (pid `
*   **作用：** 创建一个新进程，但与 `fork` 不同，它在子进程调用 `exec` 或 `_exit` 之前，共享父进程的地址空间，并且父进程会被阻塞。

#### 函数原型
```c
#include 
pid_t vfork(void);

功能

vfork() 也调用一次，返回两次。它创建一个新进程，但这个新进程（子进程）在调用 exec 系列函数或 _exit 之前，与父进程共享内存（包括栈、数据段等）。在此期间，父进程是被挂起的。这使得 vfork 在紧接着调用 exec 时非常高效，因为它避免了复制父进程的页表。

参数

无参数。

返回值

在父进程中： 返回新创建的子进程的 PID。
在子进程中： 返回 0。
失败时： 在父进程中返回 -1，并设置 errno。

相似函数或关联函数

fork(): 更通用，子进程独立于父进程。
exec() 系列函数 (execl, execv, execle, execve, execlp, execvp): 用新程序替换当前进程的镜像。

示例代码 (C89)

#include    /* printf, perror */
#include   /* vfork, getpid, getppid, _exit */
#include  /* pid_t */

int main() {
    pid_t pid;
    int shared_var = 10; /* 父子进程共享的变量 */

    printf("Before vfork, shared_var = %d\n", shared_var);

    /* 调用 vfork 创建子进程 */
    pid = vfork();

    if (pid `
*   **作用：** 执行一个 shell 命令字符串。它内部通常通过调用 `fork`、`exec` 和 `wait` 来实现。

#### 函数原型
```c
#include 
int system(const char *command);

功能

system() 函数将 command 字符串传递给系统的命令处理器（通常是 shell，如 /bin/sh）来执行。它是一个方便的函数，可以让你在 C 程序中执行外部命令。

参数

command: 指向以空字符结尾的字符串，该字符串包含了要执行的命令及其参数。

返回值

成功： 返回命令执行后的退出状态。这个状态可以通过 WIFEXITED, WEXITSTATUS 等宏来解析（类似 wait）。
command 为 NULL： 如果系统有命令处理器可用，则返回非零值；否则返回 0。这可以用来检查 system 是否可用。
失败或错误： 返回 -1，并设置 errno（例如，无法创建子进程）。
命令本身执行失败： 返回由命令处理器报告的状态（通常是命令的退出码）。

相似函数或关联函数

fork(), exec() 系列, wait(): system 内部就是用这些函数实现的，提供了更底层和灵活的控制。
popen(), pclose(): 可以执行命令并获取其输出（通过管道）。

示例代码 (C89)

#include   /* printf, perror */
#include  /* system */

int main() {
    int result;

    /* 检查 system 是否可用 */
    if (system(NULL) == 0) {
        printf("Command processor is not available.\n");
        return 1;
    }

    printf("Command processor is available.\n");

    /* 执行一个简单的命令 */
    printf("\n--- Executing 'echo Hello from system call' ---\n");
    result = system("echo Hello from system call");
    if (result == -1) {
        perror("system call failed");
        return 1;
    } else {
        printf("Command 'echo ...' finished with status %d\n", result);
        /* 注意：直接的 result 值可能包含更多信息，需要用 wait 宏解析 */
        /* 为简化，这里只打印原始值 */
    }

    /* 执行一个列出文件的命令 */
    printf("\n--- Executing 'ls -l' ---\n");
    result = system("ls -l");
    if (result == -1) {
        perror("system call failed");
    } else {
        printf("Command 'ls -l' finished with status %d\n", result);
    }

    /* 执行一个会失败的命令 */
    printf("\n--- Executing 'nonexistentcommand' ---\n");
    result = system("nonexistentcommand");
    if (result == -1) {
        perror("system call failed (this might be due to fork/exec failure)");
    } else {
        printf("Command 'nonexistentcommand' finished with status %d\n", result);
        /* 这里的 status 通常表示命令未找到 */
    }

    printf("\nMain program ending.\n");
    return 0;
}

4. `strlen` - 字符串处理

函数介绍及归类

类别： 标准库函数 (Standard Library Function)
头文件： #include
作用： 计算 C 风格字符串（以 \0 结尾）的长度。

函数原型

1 2	#include size_t strlen(const char *str);

功能

strlen() 函数遍历字符串 str，从第一个字符开始计数，直到遇到终止符 \0（不包括 \0），然后返回计数的字符数。

参数

str: 指向以 \0 结尾的字符串。

返回值

返回字符串 str 中字符的个数（不包括结尾的 \0 字符）。返回类型 size_t 是一个无符号整数类型。

相似函数或关联函数

sizeof: 这是一个运算符，不是函数。对于字符数组，sizeof 返回整个数组的大小（包括 \0），而 strlen 返回实际内容的长度。
strnlen(): (C99/C11) 类似 strlen，但会限制搜索长度，防止读取超出指定范围的内存。

示例代码 (C89)

#include   /* printf */
#include  /* strlen */

int main() {
    char str1[] = "Hello"; /* 编译器自动添加 \0 */
    char str2[20] = "World"; /* 数组大小为 20，但内容只有 6 个字符（包括 \0） */
    const char *str3 = "This is a longer string.";
    char str4[] = {'N', 'o', ' ', 'N', 'u', 'l', 'l'}; /* 危险！没有 \0 */

    size_t len1, len2, len3;

    len1 = strlen(str1);
    len2 = strlen(str2);
    len3 = strlen(str3);

    printf("String 1: \"%s\"\n", str1);
    printf("Length of str1 (strlen): %u\n", (unsigned int)len1);
    printf("Size of str1 array (sizeof): %u\n", (unsigned int)sizeof(str1)); /* 包括 \0 */

    printf("\nString 2: \"%s\"\n", str2);
    printf("Length of str2 (strlen): %u\n", (unsigned int)len2);
    printf("Size of str2 array (sizeof): %u\n", (unsigned int)sizeof(str2)); /* 包括 \0 */

    printf("\nString 3: \"%s\"\n", str3);
    printf("Length of str3 (strlen): %u\n", (unsigned int)len3);

    printf("\n--- Important Note ---\n");
    printf("String 4 is not null-terminated. Calling strlen on it leads to undefined behavior.\n");
    printf("It might crash or run forever. Uncommenting the next lines is dangerous.\n");
    /* 危险！不要对没有 \0 的字符串调用 strlen */
    /* printf("Length of str4 (UNDEFINED BEHAVIOR!): %u\n", (unsigned int)strlen(str4)); */

    return 0;
}

5. `strchr` - 字符串处理

函数介绍及归类

类别： 标准库函数 (Standard Library Function)
头文件： #include
作用： 在字符串中查找第一次出现的指定字符。

函数原型

1 2	#include char strchr(const char str, int c);

功能

strchr() 函数在字符串 str 中从左到右搜索字符 c（转换为 char 类型进行比较）。搜索包括终止符 \0。

参数

str: 指向要搜索的以 \0 结尾的字符串。
c: 要搜索的字符，以 int 类型传递，但在比较时会转换为 unsigned char。

返回值

找到： 返回指向字符串 str 中第一次出现字符 c 的指针。
未找到： 返回 NULL。

相似函数或关联函数

strrchr(): 查找字符在字符串中最后一次出现的位置。
strstr(): 查找一个子字符串在另一个字符串中第一次出现的位置。
strpbrk(): 查找字符串中第一个匹配指定字符集中任意字符的位置。

示例代码 (C89)

#include   /* printf */
#include  /* strchr */

int main() {
    const char *str = "This is a sample string.";
    char target_char = 's';
    char *result_ptr;

    printf("String to search: \"%s\"\n", str);
    printf("Character to find: '%c'\n", target_char);

    /* 查找第一次出现 's' 的位置 */
    result_ptr = strchr(str, (int)target_char);

    if (result_ptr != NULL) {
        printf("First occurrence of '%c' found at position: %ld\n",
               target_char, (long)(result_ptr - str)); /* 计算偏移量 */
        printf("Substring from first '%c': \"%s\"\n", target_char, result_ptr);

        /* 查找下一个 's' */
        printf("\n--- Searching for next occurrence ---\n");
        result_ptr = strchr(result_ptr + 1, (int)target_char); /* 从下一个位置开始搜索 */
        if (result_ptr != NULL) {
             printf("Second occurrence of '%c' found at position: %ld\n",
                    target_char, (long)(result_ptr - str));
             printf("Substring from second '%c': \"%s\"\n", target_char, result_ptr);
        } else {
            printf("No more occurrences of '%c' found.\n", target_char);
        }

    } else {
        printf("Character '%c' not found in the string.\n", target_char);
    }

    /* 查找字符串结尾符 '\0' */
    printf("\n--- Searching for null terminator ---\n");
    result_ptr = strchr(str, '\0');
    if (result_ptr != NULL) {
        printf("Null terminator '\\0' found at position: %ld\n", (long)(result_ptr - str));
        printf("Pointer points to: '%c' (ASCII value %d)\n", *result_ptr, (int)*result_ptr);
    }

    /* 查找一个不存在的字符 */
    printf("\n--- Searching for a character that doesn't exist ---\n");
    result_ptr = strchr(str, 'z');
    if (result_ptr != NULL) {
        printf("Character 'z' found? This is unexpected.\n");
    } else {
        printf("Character 'z' not found, as expected. strchr returned NULL.\n");
    }

    return 0;
}

2025-07-26

Linux系统编程

Linux 3.0 内核系统调用

首先需要明确一点：系统调用的具体列表和编号会随着内核版本演进而变化，增加新的调用或废弃旧的调用。虽然核心功能（如文件 I/O、进程管理）相对稳定，但细节上会有差异。请注意，最准确的信息始终来自于查阅对应内核版本的源代码或权威文档。

Linux 3.0 内核系统调用 (基于 x86_64 架构)

Linux 3.0 是一个相对成熟的内核版本。其系统调用接口已经非常丰富和稳定。

系统调用分类与接口

进程控制 (Process Control)
- fork (57): 创建一个子进程。
- vfork (58): 创建子进程，但在子进程调用 exec 或 _exit 前阻塞父进程。
- clone (56): 创建子进程或线程，比 fork/vfork 更灵活，允许共享内存空间等。
- execve (59): 用新程序替换当前进程镜像。
- exit (60): 终止调用进程。
- exit_group (231): 终止线程组中的所有线程。
- wait4 (61): 等待子进程状态变化。
- waitid (247): 等待子进程状态变化（提供比 wait4 更丰富的信息）。
- kill (62): 发送信号给进程。
- tkill (200): 发送信号给指定线程 (已废弃，推荐使用 tgkill)。
- tgkill (234): 发送信号给指定进程内的指定线程。
- getpid (39): 获取调用进程的进程 ID (PID)。
- getppid (110): 获取调用进程的父进程 ID (PPID)。
- getuid (102): 获取真实用户 ID。
- geteuid (107): 获取有效用户 ID。
- getgid (104): 获取真实组 ID。
- getegid (108): 获取有效组 ID。
- setuid (105): 设置用户 ID。
- setgid (106): 设置组 ID。
- getgroups (115): 获取附加组 ID 列表。
- setgroups (116): 设置附加组 ID 列表。
- setreuid (113): 设置真实和有效用户 ID。
- setregid (114): 设置真实和有效组 ID。
- setresuid (117): 设置真实、有效和保存的用户 ID。
- setresgid (119): 设置真实、有效和保存的组 ID。
- getresuid (118): 获取真实、有效和保存的用户 ID。
- getresgid (120): 获取真实、有效和保存的组 ID。
- setsid (112): 创建新的会话。
- getsid (124): 获取会话 ID。
- setpgid (109): 设置进程组 ID。
- getpgid (121): 获取进程组 ID。
- getpgrp (111): 获取当前进程的进程组 ID。
- prctl (157): 操作进程属性（如设置进程名、安全模块等）。
- arch_prctl (158): 特定于架构的进程控制（x86_64 上用于设置 FS/GS 段基址）。
- personality (135): 设置进程执行域（personality）。
- getpriority (140): 获取进程/进程组的调度优先级。
- setpriority (141): 设置进程/进程组的调度优先级。
- sched_setscheduler (144): 设置进程的调度策略和参数。
- sched_getscheduler (145): 获取进程的调度策略。
- sched_yield (24): 主动让出 CPU。
- sched_get_priority_max (146): 获取指定调度策略的最大优先级。
- sched_get_priority_min (147): 获取指定调度策略的最小优先级。
- sched_rr_get_interval (148): 获取 SCHED_RR 策略的时间片。
- nanosleep (35): 高精度睡眠。
- getitimer (36): 获取间隔计时器值。
- setitimer (38): 设置间隔计时器值。
文件 I/O (File Input/Output)
- open (2): 打开或创建文件。
- openat (257): 类似 open，但允许指定相对路径的基准目录描述符。
- creat (85): 创建新文件（等同于 open 带 O_CREAT|O_WRONLY|O_TRUNC 标志）。
- close (3): 关闭打开的文件描述符。
- read (0): 从文件描述符读取数据。
- write (1): 向文件描述符写入数据。
- pread64 (17): 从文件指定偏移量读取数据（原子操作）。
- pwrite64 (18): 向文件指定偏移量写入数据（原子操作）。
- readv (19): 从文件描述符读取数据到多个缓冲区（分散读）。
- writev (20): 从多个缓冲区写入数据到文件描述符（集中写）。
- lseek (8): 设置文件偏移量。
- fcntl (72): 对打开的文件描述符进行各种控制操作（如复制描述符、设置标志）。
- dup (32): 复制文件描述符。
- dup2 (33): 复制文件描述符，并允许指定新的描述符号。
- dup3 (292): 类似 dup2，但允许设置 O_CLOEXEC 标志。
- select (23): I/O 多路复用，监视多个文件描述符。
- poll (7): I/O 多路复用，监视多个文件描述符。
- epoll_create (213): 创建 epoll 实例。
- epoll_create1 (291): 创建 epoll 实例，允许设置标志。
- epoll_ctl (233): 控制 epoll 实例（添加/修改/删除监视的文件描述符）。
- epoll_wait (232): 等待 epoll 实例上的事件。
- pipe (22): 创建管道。
- pipe2 (293): 创建管道，允许设置标志（如 O_CLOEXEC, O_NONBLOCK）。
文件系统控制 (File System Control)
- stat (4): 获取文件状态信息。
- lstat (6): 获取文件状态信息（不跟随符号链接）。
- fstat (5): 获取打开文件描述符对应的文件状态信息。
- newstat (106, 64-bit 版本): 获取文件状态信息（64位兼容）。
- newlstat (107, 64-bit 版本): 获取文件状态信息（不跟随符号链接，64位兼容）。
- newfstat (108, 64-bit 版本): 获取打开文件描述符对应的文件状态信息（64位兼容）。
- statfs (137): 获取文件系统统计信息。
- fstatfs (138): 获取打开文件描述符所在文件系统的统计信息。
- access (21): 检查调用进程是否可以访问文件（按实际用户ID和组ID）。
- chmod (90): 改变文件权限。
- fchmod (91): 改变打开文件描述符对应的文件权限。
- chown (92): 改变文件所有者和组。
- fchown (93): 改变打开文件描述符对应的文件所有者和组。
- lchown (94): 改变符号链接本身的所有者和组。
- truncate (76): 将文件截断或扩展到指定长度。
- ftruncate (77): 将打开文件描述符对应的文件截断或扩展到指定长度。
- utime (132): 改变文件的访问时间和修改时间。
- utimes (235): 改变文件的访问时间和修改时间（使用 timeval 结构）。
- link (86): 创建硬链接。
- linkat (265): 创建硬链接，允许指定相对路径基准。
- symlink (88): 创建符号链接。
- symlinkat (266): 创建符号链接，允许指定相对路径基准。
- readlink (89): 读取符号链接的内容。
- readlinkat (267): 读取符号链接的内容，允许指定相对路径基准。
- unlink (87): 删除目录项（通常用于删除文件）。
- unlinkat (263): 删除目录项，允许指定相对路径基准和标志。
- rename (82): 重命名文件或目录。
- renameat (264): 重命名文件或目录，允许指定相对路径基准。
- mkdir (83): 创建目录。
- mkdirat (258): 创建目录，允许指定相对路径基准。
- rmdir (84): 删除空目录。
- chdir (80): 改变当前工作目录。
- fchdir (81): 通过文件描述符改变当前工作目录。
- getcwd (79): 获取当前工作目录路径。
- umask (95): 设置或获取文件模式创建掩码。
- mknod (133): 创建特殊文件（设备文件、FIFO）。
- mknodat (259): 创建特殊文件，允许指定相对路径基准。
- getdents (78): 读取目录内容（旧接口）。
- getdents64 (217): 读取目录内容（新接口，支持 64 位 inode）。
内存管理 (Memory Management)
- brk (12): 改变数据段大小。
- sbrk (12, 库函数封装): 改变数据段大小。
- mmap (9): 将文件或设备映射到内存，或分配匿名内存。
- munmap (11): 解除内存映射。
- mremap (25): 重新映射虚拟内存地址。
- msync (26): 将映射区域的修改同步到文件。
- mprotect (10): 设置内存页的保护属性。
- mincore (27): 确定内存页是否在物理内存中。
- madvise (28): 给内核提供关于内存访问模式的建议。
- shmget (29): 分配 System V 共享内存段。
- shmat (30): 连接 System V 共享内存段。
- shmdt (67): 断开 System V 共享内存段连接。
- shmctl (31): 控制 System V 共享内存段。
信号处理 (Signal Handling)
- signal (48, 库函数封装): 设置信号处理函数（不推荐直接使用，推荐 sigaction）。
- sigaction (13): 检查或修改信号的处理动作。
- sigprocmask (14): 检查或修改信号掩码。
- sigpending (15): 检查挂起的信号。
- sigsuspend (16): 等待信号。
- sigaltstack (131): 设置或获取信号栈信息。
时间管理 (Time Management)
- time (201): 获取当前时间（秒）。
- gettimeofday (96): 获取当前时间（秒和微秒）。
- settimeofday (164): 设置系统时间。
- clock_gettime (228): 获取指定时钟的时间。
- clock_settime (227): 设置指定时钟的时间。
- clock_getres (229): 获取指定时钟的精度。
套接字 (Sockets)
- socket (41): 创建套接字。
- bind (49): 将套接字绑定到地址。
- connect (42): 建立到另一个套接字的连接。
- listen (50): 监听套接字上的连接。
- accept (43): 接受一个套接字连接。
- getsockname (51): 获取套接字本地地址。
- getpeername (52): 获取套接字对端地址。
- socketpair (53): 创建一对已连接的套接字。
- send (44): 通过套接字发送消息。
- recv (45): 通过套接字接收消息。
- sendto (46): 通过套接字发送数据报。
- recvfrom (47): 通过套接字接收数据报。
- shutdown (48): 关闭套接字的全部或部分连接。
- setsockopt (54): 设置套接字选项。
- getsockopt (55): 获取套接字选项。
- sendmsg (46): 通过套接字发送消息（支持辅助数据）。
- recvmsg (47): 通过套接字接收消息（支持辅助数据）。
用户和组管理 (User and Group Management)
- （已在进程控制部分列出：getuid, geteuid, getgid, getegid, setuid, setgid, getgroups, setgroups, setreuid, setregid, setresuid, setresgid, getresuid, getresgid）
系统信息和控制 (System Information and Control)
- uname (63): 获取系统名称、版本等信息。
- sysinfo (179): 获取系统统计信息。
- times (100): 获取进程时间。
- getrusage (98): 获取资源使用信息。
- syslog (103): 读写内核日志缓冲区。
- iopl (172): 设置 I/O 权限级别（需要特权）。
- ioperm (173): 设置端口 I/O 权限位图（需要特权）。
其他 (Miscellaneous)
- ioctl (16): 设备特定的 I/O 操作。
- fcntl (72): 文件描述符控制（已在文件 I/O 部分列出）。
- mount (165): 挂载文件系统。
- umount2 (166): 卸载文件系统。
- pivot_root (155): 改变根文件系统。
- swapon (167): 启用交换空间。
- swapoff (168): 禁用交换空间。
- reboot (169): 重启或关闭系统（需要特权）。
- init_module (171): 加载内核模块（需要特权）。
- delete_module (176): 卸载内核模块（需要特权）。
- kexec_load (246): 加载新的内核以供 kexec 使用。
- acct (163): 启用或禁用进程记账。
- capget (125): 获取线程的能力。
- capset (126): 设置线程的能力。
- ptrace (101): 进程跟踪。
- sysfs (139): 获取关于系统文件系统的信息。
- ustat (136): 获取文件系统统计信息（已废弃）。

Linux 5.x 内核系统调用 (基于 x86_64 架构)

Linux 5.x 是一个较新的内核系列，它在保持向后兼容的同时，引入了许多新特性和系统调用。

系统调用分类与接口 (与 3.0 相比的主要变化)

核心分类基本一致，但在具体调用上有所增减和演进：

新增的系统调用 (New Syscalls):
- io_uring_setup (425): 设置 io_uring 异步 I/O 接口。
- io_uring_enter (426): 启动/提交 io_uring 操作。
- io_uring_register (427): 注册文件/缓冲区等供 io_uring 使用。
- openat2 (437): openat 的扩展版本，提供更多控制选项。
- pidfd_send_signal (424): 通过 PID 文件描述符发送信号，更安全。
- pidfd_open (434): 为进程 ID 打开一个文件描述符。
- clone3 (435): clone 的扩展版本，提供更丰富的参数。
- close_range (436): 关闭一个范围内的文件描述符。
- fsconfig (431): 配置和管理文件系统挂载参数。
- fsmount (432): 创建挂载实例。
- fsopen (430): 打开文件系统。
- fspick (433): 选择文件系统挂载点。
- move_mount (429): 移动挂载点。
- open_tree (428): 打开目录树以进行挂载操作。
- landlock_create_ruleset (444+): Landlock LSM 安全模块相关。
- landlock_add_rule (445+): Landlock LSM 安全模块相关。
- landlock_restrict_self (446+): Landlock LSM 安全模块相关。
- memfd_secret (447+): 创建一个内存文件描述符，其内容对内核其他部分保密。
- process_mrelease (448+): 释放与进程相关的内存。
- futex_waitv (449+): futex 的扩展，支持等待多个 futex。
- set_mempolicy_home_node (450+): 设置内存策略的首选 NUMA 节点。
演进和改进 (Evolution & Improvements):
- statx (332): 一个新的、更强大和灵活的获取文件状态信息的系统调用，旨在替代 stat/lstat/fstat 系列。它提供了更丰富的元数据和更好的性能。
- copy_file_range (326): 在两个文件描述符之间高效地复制数据，内核层面优化。
- preadv2 (327), pwritev2 (328): preadv/pwritev 的增强版，支持额外的标志（如 RWF_NOWAIT, RWF_HIPRI）。
- pkey_mprotect (329): 与内存保护密钥（Memory Protection Keys）一起使用，提供比 mprotect 更细粒度的保护。
- pkey_alloc (330), pkey_free (331): 分配和释放内存保护密钥。
- statfs/fstatfs 行为改进: 对某些文件系统的支持和信息返回可能更完善。
- mount/umount 相关: 旧的 mount 系统调用仍然存在，但新的文件系统 API (fsopen, fsconfig, fsmount 等) 提供了更现代、更安全的挂载方式。
- seccomp 增强: 与安全相关的系统调用可能有更新，用于构建更严格的沙箱。
废弃或不推荐 (Deprecated/Obsolete):
- 一些旧的、功能重叠或有安全问题的调用可能被标记为废弃，鼓励使用新接口。例如，某些特定架构的旧调用可能不再推荐。
- sysfs (139): 在某些场景下可能被新的接口替代或使用减少。
核心功能保持稳定 (Core Functionality Remains Stable):
- read, write, open, close, fork, execve, mmap, socket, bind, connect 等基础且核心的系统调用在 5.x 中依然存在，保证了向后兼容性。它们的编号和基本语义通常不变。

总结

Linux 3.0: 代表了内核成熟期的一个稳定版本，包含了当时绝大多数常用和必要的系统调用。其分类清晰，是学习经典 Linux 系统编程的良好起点。
Linux 5.x: 在 3.0 的基础上，增加了许多现代化特性，特别是在异步 I/O (io_uring)、安全 (Landlock, memfd_secret)、内存管理 (pkey_*)、文件系统操作 (新挂载 API) 和进程控制 (clone3, pidfd_*) 方面。这些新调用旨在提高性能、安全性、灵活性和易用性。

学习建议:

从基础开始: 先掌握 3.0 中列出的核心系统调用，理解它们的工作原理和使用场景。
查阅手册: 始终使用 man 2 syscall_name 来获取最准确的文档。
实践编码: 通过编写小程序来实践这些调用。
关注演进: 学习 5.x 新增的调用，特别是那些能显著提升性能或安全性的功能（如 io_uring, statx）。
检查兼容性: 如果你的程序需要在不同内核版本上运行，务必检查所使用的系统调用的可用性。

希望这份详细的分类和介绍能帮助你更好地理解 Linux 系统编程！

2025-07-20

C语言开发

C语言标准/C++标准

C语言高级编程技巧与最佳实践-完整版

https://users.ece.cmu.edu/~eno/coding/CppCodingStandard.html
https://users.ece.cmu.edu/~eno/coding/CCodingStandard.html

2025-07-13

C语言开发

c库缓冲方式对性能的影响及代码优化方法

C库缓冲方式对性能的影响及代码优化方法，详解fwrite缓存策略提升效率。fwrite的缓存方式对性能有显著影响，这主要涉及到C标准库的I/O缓冲机制。合理配置缓冲可以大幅提升I/O效率，特别是在处理大量数据时。

一、C标准库的三种缓冲模式

全缓冲（Fully Buffered）

缓冲区满时才进行实际I/O操作（如写入磁盘）。
默认用于普通文件（如磁盘文件），缓冲区大小通常为4KB或8KB（取决于系统）。
性能最佳，减少实际I/O次数，但数据可能延迟写入。

行缓冲（Line Buffered）

遇到换行符\n或缓冲区满时刷新。
默认用于终端设备（如stdout、stdin），缓冲区较小（如1KB）。
平衡交互性和性能，适合实时输出。

无缓冲（Unbuffered）

每次调用fwrite立即执行实际I/O。
默认用于标准错误输出（stderr），确保错误信息实时显示。
性能最差，但保证数据实时性。

二、缓冲对性能的影响

1. 性能差异示例

假设需要写入1MB数据，分1000次每次写入1KB：

无缓冲：触发1000次实际I/O，性能最差。
行缓冲：若数据包含换行符，可能触发多次I/O，性能中等。
全缓冲：仅触发1次或少数几次I/O（取决于缓冲区大小），性能最佳。

2. 三种方式的性能测试对比

#include <stdio.h>
#include <time.h>

#define SIZE 1000000  // 1MB数据

int main() {
    char buffer&#91;SIZE];
    FILE *fp;
    clock_t start, end;
    double cpu_time_used;

    // 测试1：无缓冲模式
    fp = fopen("unbuffered.bin", "wb");
    setvbuf(fp, NULL, _IONBF, 0);  // 设置无缓冲
    start = clock();
    fwrite(buffer, 1, SIZE, fp);
    end = clock();
    fclose(fp);
    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
    printf("无缓冲耗时: %f 秒\n", cpu_time_used);

    // 测试2：全缓冲模式（默认）
    fp = fopen("buffered.bin", "wb");
    // 默认即为全缓冲，无需额外设置
    start = clock();
    fwrite(buffer, 1, SIZE, fp);
    end = clock();
    fclose(fp);
    cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC;
    printf("全缓冲耗时: %f 秒\n", cpu_time_used);

    return 0;
}

典型结果：无缓冲模式耗时可能是全缓冲的10-100倍。

三、如何控制缓冲行为

1. 使用setvbuf函数

// 函数原型
int setvbuf(FILE *stream, char *buf, int mode, size_t size);

// 示例：
FILE *fp = fopen("data.bin", "wb");

// 1. 全缓冲（自定义缓冲区）
char my_buffer&#91;8192];  // 8KB缓冲区
setvbuf(fp, my_buffer, _IOFBF, sizeof(my_buffer));

// 2. 行缓冲
setvbuf(fp, NULL, _IOLBF, 0);  // 系统分配默认大小的行缓冲

// 3. 无缓冲
setvbuf(fp, NULL, _IONBF, 0);

2. 使用setbuf简化设置

// 全缓冲（自定义缓冲区）
char buf&#91;8192];
setbuf(fp, buf);

// 无缓冲
setbuf(fp, NULL);  // 等价于 setvbuf(fp, NULL, _IONBF, 0)

四、性能优化建议

优先使用全缓冲对非交互式文件操作（如日志、数据存储），默认全缓冲已足够高效。

调整缓冲区大小

对大文件写入，可增大缓冲区（如64KB或1MB），减少I/O次数：c char large_buf[1048576]; // 1MB缓冲区 setvbuf(fp, large_buf, _IOFBF, sizeof(large_buf));
但过大的缓冲区会占用过多内存，需权衡。

批量写入避免频繁调用fwrite写入少量数据，尽量累积到缓冲区大小再写入：

// 低效：
for (int i = 0; i < 1000; i++) {
    fwrite(&data&#91;i], sizeof(data&#91;i]), 1, fp);  // 1000次I/O
}

// 高效：
fwrite(data, sizeof(data&#91;0]), 1000, fp);  // 1次I/O

适时刷新缓冲区若需确保数据及时写入（如崩溃恢复场景），可手动刷新：

1	fflush(fp); // 强制将缓冲区数据写入磁盘

五、特殊场景注意事项

实时日志记录若需确保日志实时写入（如系统崩溃时不丢失数据），可使用：

1
2
3

setvbuf(log_file, NULL, _IONBF, 0);  // 无缓冲
// 或定期手动刷新
fflush(log_file);

网络I/O对套接字文件流，默认通常为行缓冲，需手动设置全缓冲以提升性能：

1	setvbuf(socket_file, NULL, _IOFBF, 8192); // 8KB全缓冲

内存映射文件（mmap）对超大数据量（GB级），可考虑使用mmap替代fwrite，直接映射内存到文件，避免缓冲区拷贝开销。

六、总结

性能排序：全缓冲 >> 行缓冲 > 无缓冲。
关键原则：减少实际I/O次数，批量处理数据。
适用场景：
全缓冲：非实时数据（如批量处理、大文件读写）。
行缓冲：交互式终端输出。
无缓冲：实时日志、错误信息。

合理配置缓冲是提升I/O密集型应用性能的关键手段之一。

process_vm_readv(2)

iovec 结构体

目标进程代码 (target.c)

访问进程代码 (accessor.c)

权限要求

安全限制

技术限制

错误处理

最佳实践

1. 安装 YARA 开发库

Ubuntu/Debian

CentOS/RHEL

macOS

文件：yara_scan.c

文件：test.yar

方法 1：使用 gcc 直接编译

方法 2：指定路径（如自定义安装）

预期输出：

1. 使用外部变量（External Variables）

2. 扫描内存缓冲区

3. 启用内置模块（如 hash, pe, elf）

1. UTF-8（通用 Unicode）

2. GBK（简体中文）

3. Big5（繁体中文）

4. Shift_JIS（日文）

5. EUC-KR（韩文）

6. ISO-8859-1（西欧，如法语、德语）

7. UTF-16LE（小端 Unicode）

8. UTF-16BE（大端 Unicode）

9. ASCII（纯英文，无扩展字符）

10. UTF-8 with BOM（带签名）

✅ 预期输出示例：

✅ 预期输出（示例）：

🔧 Ubuntu / Debian 系统

🔧 CentOS / RHEL / Rocky Linux / AlmaLinux

🔧 Fedora

🔧 openSUSE

只需一步修复（Ubuntu/Debian）：

1. 安装 YARA

Linux（Ubuntu/Debian）

macOS

Python 安装（推荐用于集成）

2. 编写 YARA 规则（.yar 文件）

3. 使用命令行运行 YARA

示例：使用 yara-python 扫描文件

示例：从字符串加载规则（无需文件）

示例：扫描目录中的所有文件

检测 PE 文件中的特定导入函数（Windows 恶意软件常见）

1. 下载 YARA 源码

2. 配置并编译（仅静态库）

参数说明：

3. 编译并安装

关键点说明：

安装交叉编译器（Ubuntu）

重新编译 YARA（Windows 静态库）

编译 Windows 可执行文件

Linux C 编程核心函数详解

1. fork - 进程管理

函数介绍及归类

函数原型

功能

参数

返回值

相似函数或关联函数

示例代码 (C89)

功能

参数

返回值

相似函数或关联函数

示例代码 (C89)

功能

参数

返回值

相似函数或关联函数

示例代码 (C89)

4. strlen - 字符串处理

函数介绍及归类

函数原型

功能

参数

1. `fork` - 进程管理

4. `strlen` - 字符串处理

5. `strchr` - 字符串处理