Node.js 性能优化：性能分析、集群与缓存

学习如何识别并修复 Node.js 性能瓶颈，涵盖 CPU 性能分析、内存泄漏、事件循环阻塞、集群、Redis 缓存及数据库查询优化。

Node.js 可以处理数万个并发连接——但前提是你要避免常见的陷阱。本指南将介绍如何识别瓶颈并修复它们。

理解事件循环

Node.js 使用单线程事件循环。这是它在 I/O 密集型工作负载下的超能力，但在 CPU 密集型任务中却是其致命弱点。

// ✅ 非阻塞——事件循环保持空闲
app.get('/users', async (req, res) => {
  const users = await db.query('SELECT * FROM users');  // I/O，让出事件循环
  res.json(users);
});

// ❌ 阻塞——事件循环在此期间被冻结
app.get('/compute', (req, res) => {
  const result = heavyComputation(); // 阻塞所有其他请求！
  res.json(result);
});

Node.js 性能优化：性能分析、集群与缓存插图

检测事件循环延迟

const { monitorEventLoopDelay } = require('perf_hooks');

const h = monitorEventLoopDelay({ resolution: 20 });
h.enable();

setInterval(() => {
  console.log({
    min: h.min / 1e6 + 'ms',
    max: h.max / 1e6 + 'ms',
    mean: h.mean / 1e6 + 'ms',
    p99: h.percentile(99) / 1e6 + 'ms',
  });
  h.reset();
}, 5000);

如果 P99 持续大于 100ms，则存在事件循环阻塞。

CPU 性能分析

内置 Node.js 性能分析器

# 启用性能分析启动
node --prof app.js

# 在负载后，终止进程
# 处理性能分析文件
node --prof-process isolate-0xXXXXXX-v8.log > profile.txt

# 查找 "Bottom up" 部分以找到热点函数

Clinic.js —— 可视化性能分析

npm install -g clinic

# 分析瓶颈
clinic doctor -- node app.js

# 详细的火焰图
clinic flame -- node app.js

# 事件循环延迟
clinic bubbleprof -- node app.js

CPU 密集型工作：工作线程

// worker-thread.js
const { parentPort, workerData } = require('worker_threads');

function heavyCompute(data) {
  // 此处进行昂贵的 CPU 计算
  let result = 0;
  for (let i = 0; i < data.iterations; i++) {
    result += Math.sqrt(i);
  }
  return result;
}

parentPort.postMessage(heavyCompute(workerData));

// main.js
const { Worker } = require('worker_threads');

function runInWorker(data) {
  return new Promise((resolve, reject) => {
    const worker = new Worker('./worker-thread.js', { workerData: data });
    worker.on('message', resolve);
    worker.on('error', reject);
    worker.on('exit', (code) => {
      if (code !== 0) reject(new Error(`Worker exited with code ${code}`));
    });
  });
}

// 这不再阻塞事件循环
app.get('/compute', async (req, res) => {
  const result = await runInWorker({ iterations: 1_000_000 });
  res.json({ result });
});

Node.js 性能优化：性能分析、集群与缓存插图

集群：使用所有 CPU 核心

// cluster.js
const cluster = require('cluster');
const os = require('os');
const numCPUs = os.cpus().length;

if (cluster.isPrimary) {
  console.log(`Primary ${process.pid} running`);
  console.log(`Forking ${numCPUs} workers...`);
  
  for (let i = 0; i < numCPUs; i++) {
    cluster.fork();
  }
  
  cluster.on('exit', (worker, code, signal) => {
    console.log(`Worker ${worker.process.pid} died. Restarting...`);
    cluster.fork(); // 自动重启
  });
} else {
  // 工作进程运行实际服务器
  const app = require('./app');
  app.listen(3000, () => {
    console.log(`Worker ${process.pid} started`);
  });
}

在生产环境中，改用 PM2：

npm install -g pm2

# 以集群模式启动（自动检测 CPU 数量）
pm2 start app.js -i max

# 监控
pm2 monit
pm2 logs

使用 Redis 缓存

// cache.ts
import { createClient } from 'redis';

const redis = createClient({ url: process.env.REDIS_URL });
await redis.connect();

// 缓存中间件
export function cache(ttlSeconds = 60) {
  return async (req, res, next) => {
    const key = `cache:${req.originalUrl}`;
    
    const cached = await redis.get(key);
    if (cached) {
      res.setHeader('X-Cache', 'HIT');
      return res.json(JSON.parse(cached));
    }
    
    // 覆盖 res.json 以缓存响应
    const originalJson = res.json.bind(res);
    res.json = async (data) => {
      await redis.setEx(key, ttlSeconds, JSON.stringify(data));
      res.setHeader('X-Cache', 'MISS');
      return originalJson(data);
    };
    
    next();
  };
}

// 使用
app.get('/products', cache(300), getProducts); // 缓存 5 分钟
app.get('/user/:id', cache(60), getUserById);  // 缓存 1 分钟

缓存失效

// 数据更改时删除缓存
async function updateProduct(id, data) {
  await db.update('products', id, data);
  
  // 使相关缓存失效
  await redis.del(`cache:/products/${id}`);
  await redis.del('cache:/products');  // 同时使列表缓存失效
}

// 基于模式的失效
const keys = await redis.keys('cache:/products*');
if (keys.length) await redis.del(keys);

数据库查询优化

N+1 问题 —— 头号性能杀手

// ❌ N+1 问题 —— 1 次查询获取订单 + N 次查询获取每个用户
const orders = await Order.findAll();  // 1 次查询
for (const order of orders) {
  const user = await User.findById(order.userId);  // N 次查询！
  order.user = user;
}

// ✅ 使用 JOIN 或 populate 解决
const orders = await Order.findAll({
  include: [{ model: User, attributes: ['name', 'email'] }]
}); // 1 次查询（或使用单独的 SELECT 时为 2 次）

// ✅ 或者使用 DataLoader 批量加载
const DataLoader = require('dataloader');
const userLoader = new DataLoader(async (ids) => {
  const users = await User.findAll({ where: { id: ids } });
  return ids.map(id => users.find(u => u.id === id));
});

// 现在自动批量处理
const user = await userLoader.load(order.userId); // 批量处理！

Node.js 性能优化：性能分析、集群与缓存插图

数据库连接池

// 使用 pg 的 PostgreSQL
const { Pool } = require('pg');

const pool = new Pool({
  connectionString: process.env.DATABASE_URL,
  max: 20,           // 池中最大连接数
  idleTimeoutMillis: 30000,
  connectionTimeoutMillis: 2000,
});

// 所有查询共享连接池（无重新连接开销）
const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);

HTTP 响应优化

压缩

import compression from 'compression';

// 压缩大于 1kb 的响应
app.use(compression({
  level: 6,          // 1-9，越高压缩率越高但 CPU 消耗越大
  threshold: 1024,   // 仅当大于 1kb 时压缩
  filter: (req, res) => {
    // 不压缩 SSE 流
    if (req.headers['accept'] === 'text/event-stream') return false;
    return compression.filter(req, res);
  }
}));

流式传输大响应

// ❌ 将整个结果加载到内存中
app.get('/export', async (req, res) => {
  const allUsers = await User.findAll(); // 可能有数百万！
  res.json(allUsers);
});

// ✅ 流式传输响应
app.get('/export', async (req, res) => {
  res.setHeader('Content-Type', 'application/json');
  res.write('[');
  
  let first = true;
  const stream = User.findAllStream(); // 基于游标的流式传输
  
  for await (const user of stream) {
    if (!first) res.write(',');
    res.write(JSON.stringify(user));
    first = false;
  }
  
  res.write(']');
  res.end();
});

内存泄漏检测

// 监控内存使用
setInterval(() => {
  const used = process.memoryUsage();
  console.log({
    rss: Math.round(used.rss / 1024 / 1024) + 'MB',
    heapTotal: Math.round(used.heapTotal / 1024 / 1024) + 'MB',
    heapUsed: Math.round(used.heapUsed / 1024 / 1024) + 'MB',
    external: Math.round(used.external / 1024 / 1024) + 'MB',
  });
}, 30000);

常见内存泄漏原因

// ❌ 泄漏：事件监听器未移除
class Server {
  constructor() {
    process.on('message', this.handleMessage.bind(this));
    // 此引用从未清理！
  }
}

// ✅ 跟踪并移除监听器
class Server {
  start() {
    this.messageHandler = this.handleMessage.bind(this);
    process.on('message', this.messageHandler);
  }
  
  stop() {
    process.off('message', this.messageHandler);
  }
}

// ❌ 泄漏：缓存不断增长而无淘汰
const cache = new Map();
function cacheData(key, value) {
  cache.set(key, value); // 永远增长！
}

// ✅ 使用具有最大大小的 LRU 缓存
import LRU from 'lru-cache';
const cache = new LRU({ max: 500, ttl: 1000 * 60 * 5 });

性能检查清单

请求处理程序中无同步文件操作（fs.readFileSync → fs.promises.readFile）
请求处理程序中无阻塞循环（移至工作线程）
数据库查询使用索引（对慢查询运行 EXPLAIN ANALYZE）
消除 N+1 查询（使用 JOIN 或 DataLoader）
为数据库配置连接池
使用 gzip/brotli 压缩响应
对昂贵且频繁读取的端点使用 Redis 缓存
使用集群模式 / PM2 实现多核利用
监控内存使用，无无界增长

→ 使用 Benchmark Builder 对你的工具和算法进行基准测试。

页面加载失败

Node.js 性能优化：性能分析、集群与缓存

Node.js 性能优化：性能分析、集群与缓存

理解事件循环

检测事件循环延迟

CPU 性能分析

内置 Node.js 性能分析器

Clinic.js —— 可视化性能分析

CPU 密集型工作：工作线程

集群：使用所有 CPU 核心

使用 Redis 缓存

缓存失效

数据库查询优化

N+1 问题 —— 头号性能杀手

数据库连接池

HTTP 响应优化

压缩

流式传输大响应

内存泄漏检测

常见内存泄漏原因

性能检查清单