AWK 高级应用手册
深入学习 AWK 的高级用法,包括网络编程、套接字操作及自动化脚本开发,适合进阶 Linux 用户
data-ad-format="fluid"
data-ad-layout-key="-7k+ex-4a-9w+4a">
一、网络编程与套接字
1. TCP 客户端通信
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| # 简单TCP客户端 awk 'BEGIN { # 连接到本地8080端口 server = "/inet/tcp/0/127.0.0.1/8080" # 发送数据 print "Hello Server" |& server # 接收响应 if ((server |& getline response) > 0) { print "Server response:", response } close(server) }'
|
2. TCP 服务器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| # 简单TCP服务器(监听8080端口) awk 'BEGIN { server = "/inet/tcp/8080/0/0" print "Server listening on port 8080..." while ((server |& getline) > 0) { print "Client connected" print "Received:", $0 # 回复客户端 print "Echo: " $0 |& server close(server) } }'
|
3. HTTP 客户端示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| # 简单HTTP GET请求 awk 'BEGIN { host = "www.example.com" port = 80 socket = "/inet/tcp/0/" host "/" port # 构造HTTP请求 request = "GET / HTTP/1.1\r\n" request = request "Host: " host "\r\n" request = request "Connection: close\r\n\r\n" # 发送请求 print request |& socket # 读取响应 while ((socket |& getline) > 0) { print $0 } close(socket) }'
|
二、进程间通信
1. 管道通信
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| # 与外部命令交互 awk 'BEGIN { cmd = "sort -n" # 发送数据到sort命令 print "30" |& cmd print "10" |& cmd print "20" |& cmd close(cmd, "to") # 关闭写入端 # 读取排序结果 while ((cmd |& getline) > 0) { print "Sorted:", $0 } close(cmd) }'
|
2. 协同进程
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| # 创建协同进程 awk 'BEGIN { coproc = "tr a-z A-Z" # 创建大写转换进程 # 发送数据 print "hello world" |& coproc # 读取结果 if ((coproc |& getline result) > 0) { print "Result:", result } close(coproc) }'
|
三、动态数据结构
1. 动态数组管理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| # 动态数组类实现 awk ' BEGIN { # 初始化数组 init_array("myarray") # 添加元素 array_push("myarray", "first") array_push("myarray", "second") array_push("myarray", "third") # 获取元素 print "Element at index 1:", array_get("myarray", 1) # 数组长度 print "Array length:", array_length("myarray") # 遍历数组 for(i = 0; i < array_length("myarray"); i++) { print "Index", i, ":", array_get("myarray", i) } }
function init_array(name) { eval(name "_length = 0") }
function array_push(name, value) { eval(name "[" eval(name "_length") "] = \"" value "\"") eval(name "_length++") }
function array_get(name, index) { return eval(name "[" index "]") }
function array_length(name) { return eval(name "_length") }
function eval(expr) { return system("echo " expr " | awk '{print " expr "}'") }'
|
2. 栈数据结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| # 栈实现 awk ' BEGIN { stack_init("mystack") stack_push("mystack", "item1") stack_push("mystack", "item2") stack_push("mystack", "item3") while(!stack_empty("mystack")) { print "Popped:", stack_pop("mystack") } }
function stack_init(name) { eval(name "_top = -1") }
function stack_push(name, item) { top = eval(name "_top") top++ eval(name "[" top "] = \"" item "\"") eval(name "_top = " top) }
function stack_pop(name) { top = eval(name "_top") if(top >= 0) { item = eval(name "[" top "]") top-- eval(name "_top = " top) return item } return "" }
function stack_empty(name) { return (eval(name "_top") < 0) }'
|
四、文件系统操作
1. 目录遍历
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| # 递归遍历目录 awk 'BEGIN { traverse_directory(".") }
function traverse_directory(dir) { cmd = "find " dir " -type f" while ((cmd | getline file) > 0) { print "Found file:", file # 处理文件... } close(cmd) }'
|
2. 文件监控
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
| # 监控文件变化 awk 'BEGIN { file = "monitor.txt" last_size = get_file_size(file) while(1) { current_size = get_file_size(file) if(current_size != last_size) { print "File changed! New size:", current_size last_size = current_size } system("sleep 1") } }
function get_file_size(filename) { cmd = "stat -c %s " filename " 2>/dev/null" if((cmd | getline size) > 0) { close(cmd) return size } close(cmd) return 0 }'
|
五、数据库接口
1. SQLite 集成
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
| # 通过系统调用与SQLite交互 awk 'BEGIN { db_file = "data.db" # 创建表 system("sqlite3 " db_file " \"CREATE TABLE IF NOT EXISTS users(id INTEGER PRIMARY KEY, name TEXT, age INTEGER)\"") # 插入数据 insert_user(db_file, "Alice", 25) insert_user(db_file, "Bob", 30) # 查询数据 query_users(db_file) }
function insert_user(db, name, age) { cmd = "sqlite3 " db " \"INSERT INTO users(name, age) VALUES (\"\"" name "\"\", " age ")\"" system(cmd) }
function query_users(db) { cmd = "sqlite3 " db " \"SELECT * FROM users\"" while((cmd | getline) > 0) { print "User:", $0 } close(cmd) }'
|
2. CSV 到数据库转换
1 2 3 4 5 6 7 8
| # CSV导入数据库 awk -F',' ' NR > 1 { cmd = "sqlite3 data.db \"INSERT INTO records(col1, col2, col3) VALUES (\"\"" $1 "\"\", \"\"" $2 "\"\", \"\"" $3 "\"\")\"" system(cmd) } ' data.csv
|
六、并发与并行处理
1. 多进程处理
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| # 并行处理多个文件 awk 'BEGIN { files[1] = "file1.txt" files[2] = "file2.txt" files[3] = "file3.txt" # 启动多个进程 for(i in files) { cmd = "wc -l " files[i] " > result_" i ".txt &" system(cmd) } # 等待所有进程完成 system("wait") # 收集结果 for(i in files) { cmd = "cat result_" i ".txt" if((cmd | getline) > 0) { print "File", files[i], ":", $1, "lines" } close(cmd) system("rm result_" i ".txt") } }'
|
2. 线程池模拟
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
| # 简单任务队列 awk 'BEGIN { max_workers = 3 current_workers = 0 # 模拟任务队列 tasks[1] = "task1.sh" tasks[2] = "task2.sh" tasks[3] = "task3.sh" tasks[4] = "task4.sh" tasks[5] = "task5.sh" for(i in tasks) { while(current_workers >= max_workers) { system("sleep 0.1") # 等待有空闲工作线程 } # 启动任务 cmd = tasks[i] " &" system(cmd) current_workers++ print "Started task:", tasks[i], "(Workers:", current_workers, ")" } # 等待所有任务完成 system("wait") print "All tasks completed" }'
|
七、高级正则表达式
1. 复杂模式匹配
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| # 多行模式匹配 awk ' BEGIN { RS = "" # 空行分隔记录 } /ERROR/ && /critical/ { print "Critical error found:" print $0 print "---" } ' system.log
# 嵌套结构解析 awk ' BEGIN { pattern = "<([a-zA-Z][a-zA-Z0-9]*)[^>]*>([^<]*)</\\1>" } { while(match($0, pattern, arr)) { tag = arr[1] content = arr[2] print "Tag:", tag, "Content:", content $0 = substr($0, RSTART + RLENGTH) } } ' html_file.txt
|
2. 正则捕获组
1 2 3 4 5 6 7 8 9 10
| # GNU awk 的正则捕获组 awk '{ if(match($0, /([0-9]{4})-([0-9]{2})-([0-9]{2})/, date_parts)) { year = date_parts[1] month = date_parts[2] day = date_parts[3] print "Year:", year, "Month:", month, "Day:", day } }' dates.txt
|
八、性能分析与优化
1. 性能监控
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| # 性能分析脚本 awk -p profile.out ' { # 实际处理逻辑 sum += $1 count++ } END { if(count > 0) { print "Average:", sum/count } } ' large_data.txt
# 查看profile.out文件分析性能
|
2. 内存优化
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| # 大文件处理优化 awk ' BEGIN { # 设置缓冲区大小 BINMODE = 3 } { # 分批处理,避免内存溢出 if(NR % 10000 == 0) { print "Processed", NR, "lines" > "/dev/stderr" } # 实际处理逻辑 # ... } END { print "Total processed:", NR, "lines" }' huge_file.txt
|
九、安全编程实践
1. 输入验证
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| # 安全的输入处理 awk ' function safe_input(str) { # 移除危险字符 gsub(/[;&|$`]/, "", str) return str }
function validate_number(str) { return (str ~ /^[0-9]+$/) }
{ user_input = safe_input($1) if(validate_number(user_input)) { print "Valid number:", user_input } else { print "Invalid input:", user_input > "/dev/stderr" } }' input.txt
|
2. 沙盒模式
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| # 使用沙盒模式运行 # gawk -S script.awk data.txt # 禁止system(), getline等危险函数
awk -S ' BEGIN { # 在沙盒模式下,这些操作会被禁止 # system("rm -rf /") # 这会失败 # cmd = "/bin/sh" # 这也会失败 print "Running in sandbox mode" } { print $0 }' data.txt
|
十、高级实用示例
1. 实时日志监控系统
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| #!/usr/bin/awk -f # 实时日志分析器 BEGIN { # 配置 ERROR_THRESHOLD = 10 WINDOW_SIZE = 60 # 60秒窗口 # 初始化统计 start_time = systime() }
{ # 解析日志行 timestamp = substr($0, 1, 19) # 假设前19个字符是时间戳 if($0 ~ /ERROR|FATAL/) { error_count++ errors[error_count] = $0 } # 每分钟报告一次 current_time = systime() if(current_time - start_time >= 60) { report_stats() start_time = current_time delete errors # 清空窗口数据 error_count = 0 } }
function report_stats() { print strftime("%Y-%m-%d %H:%M:%S"), "- Errors in last minute:", error_count if(error_count > ERROR_THRESHOLD) { print "ALERT: Error threshold exceeded!" # 可以发送告警邮件等 } }
|
2. 数据可视化工具
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
| # 简单的文本图表生成器 awk ' BEGIN { max_value = 0 }
{ data[NR] = $1 if($1 > max_value) max_value = $1 }
END { print "Data Visualization:" print "==================" scale = 50 / max_value # 缩放到50个字符宽度 for(i = 1; i <= NR; i++) { bar_length = int(data[i] * scale) printf "%3d |", data[i] for(j = 1; j <= bar_length; j++) { printf "*" } print "" } }' numbers.txt
|
3. 配置管理器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
| # 高级配置文件解析器 awk ' BEGIN { # 支持多种配置格式 current_section = "global" }
# 处理注释 /^ *#/ { next }
# 处理空行 /^ *$/ { next }
# 处理节头 [section] /^\[.*\]$/ { sub(/^\[/, "") sub(/\]$/, "") current_section = $0 next }
# 处理键值对 /^[a-zA-Z_]/ && /=/ { # 支持变量替换 gsub(/\${([^}]+)}/, "\\$" var_map[substr($0, RSTART+2, RLENGTH-3)]) split($0, parts, "=") key = trim(parts[1]) value = trim(parts[2]) # 存储配置 config[current_section "/" key] = value var_map[key] = value }
END { # 输出所有配置 for(key in config) { print key " = " config[key] } }
function trim(str) { gsub(/^ +| +$/, "", str) return str }' config.ini
|
这些高级应用展示了 AWK 在系统编程、网络通信、数据库集成等方面的强大能力。掌握这些技能后,你可以用 AWK 构建复杂的系统工具和自动化脚本。
AWK 高级应用手册, AWK 网络编程教程, TCP 客户端通信 awk, AWK 套接字编程指南, 高级 AWK 技术详解, AWK 实战案例分析, 网络编程中的 AWK 应用, AWK 高级技巧与实践, Linux AWK 网络开发, AWK 通信程序开发教程