正在加载,请稍候…

SQL 窗口函数:RANK、LAG、LEAD 与运行总计

掌握 SQL 窗口函数以进行分析查询。学习 RANK、ROW_NUMBER、LAG/LEAD、使用 SUM OVER 的运行总计、移动平均以及实际业务用例。

SQL 窗口函数:RANK、LAG、LEAD 与运行总计

SQL 窗口函数:让分析变得简单

窗口函数对与当前行相关的一组行执行计算。

排名函数

-- ROW_NUMBER:唯一的顺序编号
SELECT
  employee_id,
  name,
  department,
  salary,
  ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;

-- RANK:并列排名相同,之后有间隔
SELECT
  name,
  score,
  RANK() OVER (ORDER BY score DESC) AS rank,
  DENSE_RANK() OVER (ORDER BY score DESC) AS dense_rank
FROM game_scores;

-- 分数:100, 100, 90 -> RANK:1, 1, 3 | DENSE_RANK:1, 1, 2

-- NTILE:将行分成 N 个相等的组
SELECT
  name,
  salary,
  NTILE(4) OVER (ORDER BY salary) AS salary_quartile
FROM employees;
-- 1=底部 25%, 2=25-50%, 3=50-75%, 4=顶部 25%

SQL 窗口函数:RANK、LAG、LEAD 与运行总计 插图

LAG 和 LEAD

-- 与上一行/下一行比较
SELECT
  order_date,
  daily_revenue,
  LAG(daily_revenue) OVER (ORDER BY order_date) AS prev_day_revenue,
  LEAD(daily_revenue) OVER (ORDER BY order_date) AS next_day_revenue,
  daily_revenue - LAG(daily_revenue) OVER (ORDER BY order_date) AS day_over_day_change,
  ROUND(
    100.0 * (daily_revenue - LAG(daily_revenue) OVER (ORDER BY order_date))
    / NULLIF(LAG(daily_revenue) OVER (ORDER BY order_date), 0),
    2
  ) AS pct_change
FROM daily_sales
ORDER BY order_date;

-- 月度环比比较
SELECT
  DATE_TRUNC('month', order_date) AS month,
  SUM(amount) AS monthly_revenue,
  LAG(SUM(amount)) OVER (ORDER BY DATE_TRUNC('month', order_date)) AS prev_month_revenue
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
ORDER BY month;

SQL 窗口函数:RANK、LAG、LEAD 与运行总计 插图

运行总计与累计统计

-- 运行总计
SELECT
  order_date,
  amount,
  SUM(amount) OVER (ORDER BY order_date) AS running_total,
  AVG(amount) OVER (ORDER BY order_date) AS running_avg,
  COUNT(*) OVER (ORDER BY order_date) AS running_count
FROM orders;

-- 累计百分比
SELECT
  product_name,
  revenue,
  SUM(revenue) OVER () AS total_revenue,
  ROUND(100.0 * revenue / SUM(revenue) OVER (), 2) AS pct_of_total,
  ROUND(100.0 * SUM(revenue) OVER (ORDER BY revenue DESC) / SUM(revenue) OVER (), 2) AS cumulative_pct
FROM product_revenue
ORDER BY revenue DESC;
-- 帕累托分析:识别贡献 80% 收入的产品

SQL 窗口函数:RANK、LAG、LEAD 与运行总计 插图

移动平均

-- 7 日移动平均
SELECT
  date,
  daily_users,
  AVG(daily_users) OVER (
    ORDER BY date
    ROWS BETWEEN 6 PRECEDING AND CURRENT ROW  -- 当前行 + 前 6 行 = 7 天
  ) AS ma_7day,
  AVG(daily_users) OVER (
    ORDER BY date
    ROWS BETWEEN 29 PRECEDING AND CURRENT ROW  -- 30 日移动平均
  ) AS ma_30day
FROM daily_metrics;

-- 基于范围的窗口(非行数)
SELECT
  sale_date,
  amount,
  AVG(amount) OVER (
    ORDER BY sale_date
    RANGE BETWEEN INTERVAL '6 days' PRECEDING AND CURRENT ROW
  ) AS rolling_7day_avg
FROM sales;

组内首/末值

-- 每位客户的首次购买日期和最近购买日期
SELECT DISTINCT
  customer_id,
  FIRST_VALUE(order_date) OVER (
    PARTITION BY customer_id
    ORDER BY order_date
  ) AS first_purchase,
  LAST_VALUE(order_date) OVER (
    PARTITION BY customer_id
    ORDER BY order_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS last_purchase,
  COUNT(*) OVER (PARTITION BY customer_id) AS total_orders
FROM orders;

实际业务示例

-- 每个类别的前 N 名
WITH ranked AS (
  SELECT
    category,
    product_name,
    revenue,
    ROW_NUMBER() OVER (PARTITION BY category ORDER BY revenue DESC) AS rn
  FROM product_sales
)
SELECT category, product_name, revenue
FROM ranked
WHERE rn <= 3;  -- 每个类别前 3 名产品

-- 会话分析:间隙与孤岛问题
WITH sessions AS (
  SELECT
    user_id,
    event_time,
    LEAD(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS next_event,
    CASE
      WHEN LEAD(event_time) OVER (PARTITION BY user_id ORDER BY event_time)
           - event_time > INTERVAL '30 minutes'
      THEN 1 ELSE 0
    END AS session_end
  FROM events
)
SELECT user_id, COUNT(*) AS session_count
FROM sessions
WHERE session_end = 1
GROUP BY user_id;

窗口函数消除了大多数分析型 SQL 中对自连接和子查询的需求。