正在加载,请稍候…

Rust 实用系统编程:CLI 工具、文件 I/O 与错误处理

通过构建实际程序学习 Rust:使用 clap 创建命令行工具、文件 I/O、thiserror/anyhow 错误处理、Tokio 异步编程,以及编写地道生产

Rust 实用系统编程:CLI 工具、文件 I/O 与错误处理

通过构建实际项目学习 Rust

仅靠理论在 Rust 中走不了多远。所有权系统、生命周期和错误处理模式只有在解决实际问题时才能真正掌握。

本指南将逐步构建一个文件处理 CLI 工具,在需要时引入 Rust 模式。最终你将拥有展示地道 Rust 代码的工作示例。

Rust 实用系统编程:CLI 工具、文件 I/O 与错误处理 插图

项目设置

# 创建新的二进制项目
cargo new file-processor
cd file-processor

# 向 Cargo.toml 添加依赖
cargo add clap --features derive
cargo add anyhow
cargo add thiserror
cargo add serde --features derive
cargo add serde_json
cargo add tokio --features full
cargo add rayon  # 并行迭代器
# Cargo.toml
[package]
name = "file-processor"
version = "0.1.0"
edition = "2021"

[dependencies]
clap = { version = "4", features = ["derive"] }
anyhow = "1"
thiserror = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }
rayon = "1"

使用 Clap 构建命令行界面

// src/main.rs
use clap::{Parser, Subcommand};
use std::path::PathBuf;

#[derive(Parser)]
#[command(name = "file-processor")]
#[command(about = "一个用于处理文本文件的工具", long_about = None)]
#[command(version = "1.0")]
struct Cli {
    /// 详细输出
    #[arg(short, long, action = clap::ArgAction::Count)]
    verbose: u8,
    
    #[command(subcommand)]
    command: Commands,
}

#[derive(Subcommand)]
enum Commands {
    /// 统计文件的行数、单词数和字符数
    Count {
        /// 要处理的文件
        #[arg(required = true)]
        files: Vec<PathBuf>,
        
        /// 仅显示行数
        #[arg(short, long)]
        lines_only: bool,
    },
    
    /// 在文件中搜索模式
    Search {
        /// 要搜索的模式
        pattern: String,
        
        /// 要搜索的文件
        #[arg(required = true)]
        files: Vec<PathBuf>,
        
        /// 不区分大小写搜索
        #[arg(short, long)]
        ignore_case: bool,
    },
    
    /// 转换文件(CSV 转 JSON 等)
    Convert {
        input: PathBuf,
        output: PathBuf,
        
        #[arg(long, default_value = "auto")]
        format: String,
    },
}

fn main() -> anyhow::Result<()> {
    let cli = Cli::parse();
    
    match cli.command {
        Commands::Count { files, lines_only } => {
            count_command(files, lines_only, cli.verbose)?;
        }
        Commands::Search { pattern, files, ignore_case } => {
            search_command(pattern, files, ignore_case)?;
        }
        Commands::Convert { input, output, format } => {
            convert_command(input, output, format)?;
        }
    }
    
    Ok(())
}

Rust 实用系统编程:CLI 工具、文件 I/O 与错误处理 插图

错误处理:thiserror 和 anyhow

// src/errors.rs
use thiserror::Error;
use std::path::PathBuf;

// 使用 thiserror 定义领域错误
#[derive(Error, Debug)]
pub enum ProcessingError {
    #[error("文件未找到: {path}")]
    FileNotFound { path: PathBuf },
    
    #[error("权限被拒绝: {path}")]
    PermissionDenied { path: PathBuf },
    
    #[error("文件 {file} 格式无效: {reason}")]
    InvalidFormat { file: PathBuf, reason: String },
    
    #[error("IO 错误: {0}")]
    Io(#[from] std::io::Error), // 自动实现 From
    
    #[error("JSON 错误: {0}")]
    Json(#[from] serde_json::Error),
}

// 在库代码中:使用具体错误类型
pub fn parse_file(path: &PathBuf) -> Result<Vec<Record>, ProcessingError> {
    let content = std::fs::read_to_string(path).map_err(|e| {
        if e.kind() == std::io::ErrorKind::NotFound {
            ProcessingError::FileNotFound { path: path.clone() }
        } else if e.kind() == std::io::ErrorKind::PermissionDenied {
            ProcessingError::PermissionDenied { path: path.clone() }
        } else {
            ProcessingError::Io(e)
        }
    })?;
    
    // 解析 CSV
    let records: Vec<Record> = content
        .lines()
        .skip(1) // 跳过表头
        .map(|line| parse_csv_line(line))
        .collect::<Result<Vec<_>, _>>()?;
    
    Ok(records)
}

// 在应用代码中:使用 anyhow 简化错误传播
fn main() -> anyhow::Result<()> {
    let records = parse_file(&PathBuf::from("data.csv"))
        .with_context(|| "解析 data.csv 失败")?;
    
    println!("解析了 {} 条记录", records.len());
    Ok(())
}

文件 I/O 模式

use std::fs::{self, File};
use std::io::{self, BufRead, BufReader, BufWriter, Write};
use std::path::{Path, PathBuf};

// 高效读取文件
fn count_lines(path: &Path) -> io::Result<usize> {
    let file = File::open(path)?;
    let reader = BufReader::new(file); // 缓冲读取——对大文件至关重要
    
    Ok(reader.lines().count())
}

// 逐行处理(内存高效)
fn process_large_file(path: &Path) -> anyhow::Result<()> {
    let file = File::open(path)?;
    let reader = BufReader::new(file);
    
    for (line_num, line) in reader.lines().enumerate() {
        let line = line?; // 传播 IO 错误
        
        if line_num % 100_000 == 0 {
            println!("正在处理第 {} 行", line_num);
        }
        
        // 处理行...
        process_line(&line)?;
    }
    
    Ok(())
}

// 写入文件
fn write_results(path: &Path, data: &[Record]) -> io::Result<()> {
    let file = File::create(path)?;
    let mut writer = BufWriter::new(file); // 缓冲写入
    
    writeln!(writer, "id,name,value")?; // 表头
    
    for record in data {
        writeln!(writer, "{},{},{}", record.id, record.name, record.value)?;
    }
    
    writer.flush()?; // 确保所有缓冲数据已写入
    Ok(())
}

// 目录遍历
fn find_files(dir: &Path, extension: &str) -> Vec<PathBuf> {
    let mut results = Vec::new();
    
    if let Ok(entries) = fs::read_dir(dir) {
        for entry in entries.flatten() {
            let path = entry.path();
            if path.is_dir() {
                results.extend(find_files(&path, extension)); // 递归
            } else if path.extension().and_then(|e| e.to_str()) == Some(extension) {
                results.push(path);
            }
        }
    }
    
    results
}

Rust 实用系统编程:CLI 工具、文件 I/O 与错误处理 插图

使用 Rayon 进行并行处理

use rayon::prelude::*;
use std::time::Instant;

fn process_files_parallel(files: &[PathBuf]) -> Vec<Result<FileStats, ProcessingError>> {
    files
        .par_iter() // 从 iter() 切换到 par_iter() 实现并行
        .map(|path| analyze_file(path))
        .collect()
}

fn analyze_file(path: &Path) -> Result<FileStats, ProcessingError> {
    let content = fs::read_to_string(path)?;
    
    Ok(FileStats {
        path: path.to_path_buf(),
        lines: content.lines().count(),
        words: content.split_whitespace().count(),
        chars: content.chars().count(),
        bytes: content.len(),
    })
}

fn main() {
    let files: Vec<PathBuf> = find_files(Path::new("."), "txt");
    
    println!("正在处理 {} 个文件...", files.len());
    let start = Instant::now();
    
    let results = process_files_parallel(&files);
    
    println!("完成,耗时 {:?}", start.elapsed());
    
    let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition(|r| r.is_ok());
    
    let total_lines: usize = successes
        .iter()
        .filter_map(|r| r.as_ref().ok())
        .map(|s| s.lines)
        .sum();
    
    println!("总行数: {}", total_lines);
    println!("错误数: {}", errors.len());
}

使用 Tokio 进行异步编程

use tokio::fs;
use tokio::io::{AsyncBufReadExt, BufReader};

// 异步文件读取
async fn read_file_async(path: &str) -> anyhow::Result<String> {
    let content = fs::read_to_string(path).await?;
    Ok(content)
}

// 异步 HTTP 请求 + 文件写入
async fn download_and_save(url: &str, output_path: &str) -> anyhow::Result<()> {
    let client = reqwest::Client::new();
    let response = client.get(url).send().await?;
    
    if !response.status().is_success() {
        anyhow::bail!("HTTP 错误: {}", response.status());
    }
    
    let bytes = response.bytes().await?;
    fs::write(output_path, &bytes).await?;
    
    println!("已下载 {} 字节到 {}", bytes.len(), output_path);
    Ok(())
}

// 并发处理多个文件
async fn process_files_concurrent(paths: &[&str]) -> Vec<anyhow::Result<String>> {
    let futures: Vec<_> = paths
        .iter()
        .map(|&path| read_file_async(path))
        .collect();
    
    // 并发运行所有 future,收集结果
    futures::future::join_all(futures).await
}

// Tokio main
#[tokio::main]
async fn main() -> anyhow::Result<()> {
    let paths = ["file1.txt", "file2.txt", "file3.txt"];
    
    let results = process_files_concurrent(&paths).await;
    
    for (path, result) in paths.iter().zip(results) {
        match result {
            Ok(content) => println!("{}: {} 字符", path, content.len()),
            Err(e) => eprintln!("{}: 错误 — {}", path, e),
        }
    }
    
    Ok(())
}

Rust 中的测试

// 同一文件中的单元测试
#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;
    
    #[test]
    fn test_count_lines() {
        let mut file = NamedTempFile::new().unwrap();
        writeln!(file, "line 1").unwrap();
        writeln!(file, "line 2").unwrap();
        writeln!(file, "line 3").unwrap();
        
        let count = count_lines(file.path()).unwrap();
        assert_eq!(count, 3);
    }
    
    #[test]
    fn test_parse_csv_record() {
        let line = "1,Alice,100.5";
        let record = parse_csv_line(line).unwrap();
        
        assert_eq!(record.id, 1);
        assert_eq!(record.name, "Alice");
        assert!((record.value - 100.5).abs() < f64::EPSILON);
    }
    
    #[test]
    fn test_file_not_found_error() {
        let result = parse_file(&PathBuf::from("/nonexistent/file.csv"));
        
        assert!(matches!(result, Err(ProcessingError::FileNotFound { .. })));
    }
    
    // 异步测试
    #[tokio::test]
    async fn test_read_file_async() {
        let result = read_file_async("/etc/hostname").await;
        assert!(result.is_ok());
    }
}

Trait:面向 Rust 开发者的接口

// Trait 定义
trait FileProcessor {
    fn process(&self, path: &Path) -> anyhow::Result<ProcessResult>;
    fn supports(&self, extension: &str) -> bool;
    
    // 默认实现
    fn description(&self) -> String {
        format!("处理器,支持 {:?}", self.supported_extensions())
    }
    
    fn supported_extensions(&self) -> Vec<&str>;
}

// 实现
struct CsvProcessor;
struct JsonProcessor;

impl FileProcessor for CsvProcessor {
    fn process(&self, path: &Path) -> anyhow::Result<ProcessResult> {
        // CSV 特定处理
        Ok(ProcessResult { count: 0 })
    }
    
    fn supports(&self, ext: &str) -> bool { ext == "csv" }
    fn supported_extensions(&self) -> Vec<&str> { vec!["csv"] }
}

impl FileProcessor for JsonProcessor {
    fn process(&self, path: &Path) -> anyhow::Result<ProcessResult> {
        // JSON 特定处理
        Ok(ProcessResult { count: 0 })
    }
    
    fn supports(&self, ext: &str) -> bool { ext == "json" }
    fn supported_extensions(&self) -> Vec<&str> { vec!["json"] }
}

// 使用 Box<dyn Trait> 进行动态分发
fn get_processor(path: &Path) -> Option<Box<dyn FileProcessor>> {
    let ext = path.extension()?.to_str()?;
    
    let processors: Vec<Box<dyn FileProcessor>> = vec![
        Box::new(CsvProcessor),
        Box::new(JsonProcessor),
    ];
    
    processors.into_iter().find(|p| p.supports(ext))
}

构建实际 Rust 程序揭示了为什么学习曲线是值得的:编译时保证意味着你的程序要么无法编译,要么正确运行。在生产环境中调试的体验比内存不安全的语言要好得多。

→ 使用 Text to Binary 转换器将文本转换为二进制表示。