通过构建实际项目学习 Rust
仅靠理论在 Rust 中走不了多远。所有权系统、生命周期和错误处理模式只有在解决实际问题时才能真正掌握。
本指南将逐步构建一个文件处理 CLI 工具,在需要时引入 Rust 模式。最终你将拥有展示地道 Rust 代码的工作示例。

项目设置
# 创建新的二进制项目
cargo new file-processor
cd file-processor
# 向 Cargo.toml 添加依赖
cargo add clap --features derive
cargo add anyhow
cargo add thiserror
cargo add serde --features derive
cargo add serde_json
cargo add tokio --features full
cargo add rayon # 并行迭代器
# Cargo.toml
[package]
name = "file-processor"
version = "0.1.0"
edition = "2021"
[dependencies]
clap = { version = "4", features = ["derive"] }
anyhow = "1"
thiserror = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tokio = { version = "1", features = ["full"] }
rayon = "1"
使用 Clap 构建命令行界面
// src/main.rs
use clap::{Parser, Subcommand};
use std::path::PathBuf;
#[derive(Parser)]
#[command(name = "file-processor")]
#[command(about = "一个用于处理文本文件的工具", long_about = None)]
#[command(version = "1.0")]
struct Cli {
/// 详细输出
#[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8,
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
/// 统计文件的行数、单词数和字符数
Count {
/// 要处理的文件
#[arg(required = true)]
files: Vec<PathBuf>,
/// 仅显示行数
#[arg(short, long)]
lines_only: bool,
},
/// 在文件中搜索模式
Search {
/// 要搜索的模式
pattern: String,
/// 要搜索的文件
#[arg(required = true)]
files: Vec<PathBuf>,
/// 不区分大小写搜索
#[arg(short, long)]
ignore_case: bool,
},
/// 转换文件(CSV 转 JSON 等)
Convert {
input: PathBuf,
output: PathBuf,
#[arg(long, default_value = "auto")]
format: String,
},
}
fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Count { files, lines_only } => {
count_command(files, lines_only, cli.verbose)?;
}
Commands::Search { pattern, files, ignore_case } => {
search_command(pattern, files, ignore_case)?;
}
Commands::Convert { input, output, format } => {
convert_command(input, output, format)?;
}
}
Ok(())
}
错误处理:thiserror 和 anyhow
// src/errors.rs
use thiserror::Error;
use std::path::PathBuf;
// 使用 thiserror 定义领域错误
#[derive(Error, Debug)]
pub enum ProcessingError {
#[error("文件未找到: {path}")]
FileNotFound { path: PathBuf },
#[error("权限被拒绝: {path}")]
PermissionDenied { path: PathBuf },
#[error("文件 {file} 格式无效: {reason}")]
InvalidFormat { file: PathBuf, reason: String },
#[error("IO 错误: {0}")]
Io(#[from] std::io::Error), // 自动实现 From
#[error("JSON 错误: {0}")]
Json(#[from] serde_json::Error),
}
// 在库代码中:使用具体错误类型
pub fn parse_file(path: &PathBuf) -> Result<Vec<Record>, ProcessingError> {
let content = std::fs::read_to_string(path).map_err(|e| {
if e.kind() == std::io::ErrorKind::NotFound {
ProcessingError::FileNotFound { path: path.clone() }
} else if e.kind() == std::io::ErrorKind::PermissionDenied {
ProcessingError::PermissionDenied { path: path.clone() }
} else {
ProcessingError::Io(e)
}
})?;
// 解析 CSV
let records: Vec<Record> = content
.lines()
.skip(1) // 跳过表头
.map(|line| parse_csv_line(line))
.collect::<Result<Vec<_>, _>>()?;
Ok(records)
}
// 在应用代码中:使用 anyhow 简化错误传播
fn main() -> anyhow::Result<()> {
let records = parse_file(&PathBuf::from("data.csv"))
.with_context(|| "解析 data.csv 失败")?;
println!("解析了 {} 条记录", records.len());
Ok(())
}
文件 I/O 模式
use std::fs::{self, File};
use std::io::{self, BufRead, BufReader, BufWriter, Write};
use std::path::{Path, PathBuf};
// 高效读取文件
fn count_lines(path: &Path) -> io::Result<usize> {
let file = File::open(path)?;
let reader = BufReader::new(file); // 缓冲读取——对大文件至关重要
Ok(reader.lines().count())
}
// 逐行处理(内存高效)
fn process_large_file(path: &Path) -> anyhow::Result<()> {
let file = File::open(path)?;
let reader = BufReader::new(file);
for (line_num, line) in reader.lines().enumerate() {
let line = line?; // 传播 IO 错误
if line_num % 100_000 == 0 {
println!("正在处理第 {} 行", line_num);
}
// 处理行...
process_line(&line)?;
}
Ok(())
}
// 写入文件
fn write_results(path: &Path, data: &[Record]) -> io::Result<()> {
let file = File::create(path)?;
let mut writer = BufWriter::new(file); // 缓冲写入
writeln!(writer, "id,name,value")?; // 表头
for record in data {
writeln!(writer, "{},{},{}", record.id, record.name, record.value)?;
}
writer.flush()?; // 确保所有缓冲数据已写入
Ok(())
}
// 目录遍历
fn find_files(dir: &Path, extension: &str) -> Vec<PathBuf> {
let mut results = Vec::new();
if let Ok(entries) = fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
results.extend(find_files(&path, extension)); // 递归
} else if path.extension().and_then(|e| e.to_str()) == Some(extension) {
results.push(path);
}
}
}
results
}
使用 Rayon 进行并行处理
use rayon::prelude::*;
use std::time::Instant;
fn process_files_parallel(files: &[PathBuf]) -> Vec<Result<FileStats, ProcessingError>> {
files
.par_iter() // 从 iter() 切换到 par_iter() 实现并行
.map(|path| analyze_file(path))
.collect()
}
fn analyze_file(path: &Path) -> Result<FileStats, ProcessingError> {
let content = fs::read_to_string(path)?;
Ok(FileStats {
path: path.to_path_buf(),
lines: content.lines().count(),
words: content.split_whitespace().count(),
chars: content.chars().count(),
bytes: content.len(),
})
}
fn main() {
let files: Vec<PathBuf> = find_files(Path::new("."), "txt");
println!("正在处理 {} 个文件...", files.len());
let start = Instant::now();
let results = process_files_parallel(&files);
println!("完成,耗时 {:?}", start.elapsed());
let (successes, errors): (Vec<_>, Vec<_>) = results.into_iter().partition(|r| r.is_ok());
let total_lines: usize = successes
.iter()
.filter_map(|r| r.as_ref().ok())
.map(|s| s.lines)
.sum();
println!("总行数: {}", total_lines);
println!("错误数: {}", errors.len());
}
使用 Tokio 进行异步编程
use tokio::fs;
use tokio::io::{AsyncBufReadExt, BufReader};
// 异步文件读取
async fn read_file_async(path: &str) -> anyhow::Result<String> {
let content = fs::read_to_string(path).await?;
Ok(content)
}
// 异步 HTTP 请求 + 文件写入
async fn download_and_save(url: &str, output_path: &str) -> anyhow::Result<()> {
let client = reqwest::Client::new();
let response = client.get(url).send().await?;
if !response.status().is_success() {
anyhow::bail!("HTTP 错误: {}", response.status());
}
let bytes = response.bytes().await?;
fs::write(output_path, &bytes).await?;
println!("已下载 {} 字节到 {}", bytes.len(), output_path);
Ok(())
}
// 并发处理多个文件
async fn process_files_concurrent(paths: &[&str]) -> Vec<anyhow::Result<String>> {
let futures: Vec<_> = paths
.iter()
.map(|&path| read_file_async(path))
.collect();
// 并发运行所有 future,收集结果
futures::future::join_all(futures).await
}
// Tokio main
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let paths = ["file1.txt", "file2.txt", "file3.txt"];
let results = process_files_concurrent(&paths).await;
for (path, result) in paths.iter().zip(results) {
match result {
Ok(content) => println!("{}: {} 字符", path, content.len()),
Err(e) => eprintln!("{}: 错误 — {}", path, e),
}
}
Ok(())
}
Rust 中的测试
// 同一文件中的单元测试
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_count_lines() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "line 1").unwrap();
writeln!(file, "line 2").unwrap();
writeln!(file, "line 3").unwrap();
let count = count_lines(file.path()).unwrap();
assert_eq!(count, 3);
}
#[test]
fn test_parse_csv_record() {
let line = "1,Alice,100.5";
let record = parse_csv_line(line).unwrap();
assert_eq!(record.id, 1);
assert_eq!(record.name, "Alice");
assert!((record.value - 100.5).abs() < f64::EPSILON);
}
#[test]
fn test_file_not_found_error() {
let result = parse_file(&PathBuf::from("/nonexistent/file.csv"));
assert!(matches!(result, Err(ProcessingError::FileNotFound { .. })));
}
// 异步测试
#[tokio::test]
async fn test_read_file_async() {
let result = read_file_async("/etc/hostname").await;
assert!(result.is_ok());
}
}
Trait:面向 Rust 开发者的接口
// Trait 定义
trait FileProcessor {
fn process(&self, path: &Path) -> anyhow::Result<ProcessResult>;
fn supports(&self, extension: &str) -> bool;
// 默认实现
fn description(&self) -> String {
format!("处理器,支持 {:?}", self.supported_extensions())
}
fn supported_extensions(&self) -> Vec<&str>;
}
// 实现
struct CsvProcessor;
struct JsonProcessor;
impl FileProcessor for CsvProcessor {
fn process(&self, path: &Path) -> anyhow::Result<ProcessResult> {
// CSV 特定处理
Ok(ProcessResult { count: 0 })
}
fn supports(&self, ext: &str) -> bool { ext == "csv" }
fn supported_extensions(&self) -> Vec<&str> { vec!["csv"] }
}
impl FileProcessor for JsonProcessor {
fn process(&self, path: &Path) -> anyhow::Result<ProcessResult> {
// JSON 特定处理
Ok(ProcessResult { count: 0 })
}
fn supports(&self, ext: &str) -> bool { ext == "json" }
fn supported_extensions(&self) -> Vec<&str> { vec!["json"] }
}
// 使用 Box<dyn Trait> 进行动态分发
fn get_processor(path: &Path) -> Option<Box<dyn FileProcessor>> {
let ext = path.extension()?.to_str()?;
let processors: Vec<Box<dyn FileProcessor>> = vec![
Box::new(CsvProcessor),
Box::new(JsonProcessor),
];
processors.into_iter().find(|p| p.supports(ext))
}
构建实际 Rust 程序揭示了为什么学习曲线是值得的:编译时保证意味着你的程序要么无法编译,要么正确运行。在生产环境中调试的体验比内存不安全的语言要好得多。
→ 使用 Text to Binary 转换器将文本转换为二进制表示。