
SQL 窗口函数:让分析变得简单
窗口函数对与当前行相关的一组行执行计算。
排名函数
-- ROW_NUMBER:唯一的顺序编号
SELECT
employee_id,
name,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;
-- RANK:并列排名相同,之后有间隔
SELECT
name,
score,
RANK() OVER (ORDER BY score DESC) AS rank,
DENSE_RANK() OVER (ORDER BY score DESC) AS dense_rank
FROM game_scores;
-- 分数:100, 100, 90 -> RANK:1, 1, 3 | DENSE_RANK:1, 1, 2
-- NTILE:将行分成 N 个相等的组
SELECT
name,
salary,
NTILE(4) OVER (ORDER BY salary) AS salary_quartile
FROM employees;
-- 1=底部 25%, 2=25-50%, 3=50-75%, 4=顶部 25%

LAG 和 LEAD
-- 与上一行/下一行比较
SELECT
order_date,
daily_revenue,
LAG(daily_revenue) OVER (ORDER BY order_date) AS prev_day_revenue,
LEAD(daily_revenue) OVER (ORDER BY order_date) AS next_day_revenue,
daily_revenue - LAG(daily_revenue) OVER (ORDER BY order_date) AS day_over_day_change,
ROUND(
100.0 * (daily_revenue - LAG(daily_revenue) OVER (ORDER BY order_date))
/ NULLIF(LAG(daily_revenue) OVER (ORDER BY order_date), 0),
2
) AS pct_change
FROM daily_sales
ORDER BY order_date;
-- 月度环比比较
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(amount) AS monthly_revenue,
LAG(SUM(amount)) OVER (ORDER BY DATE_TRUNC('month', order_date)) AS prev_month_revenue
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
ORDER BY month;

运行总计与累计统计
-- 运行总计
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total,
AVG(amount) OVER (ORDER BY order_date) AS running_avg,
COUNT(*) OVER (ORDER BY order_date) AS running_count
FROM orders;
-- 累计百分比
SELECT
product_name,
revenue,
SUM(revenue) OVER () AS total_revenue,
ROUND(100.0 * revenue / SUM(revenue) OVER (), 2) AS pct_of_total,
ROUND(100.0 * SUM(revenue) OVER (ORDER BY revenue DESC) / SUM(revenue) OVER (), 2) AS cumulative_pct
FROM product_revenue
ORDER BY revenue DESC;
-- 帕累托分析:识别贡献 80% 收入的产品

移动平均
-- 7 日移动平均
SELECT
date,
daily_users,
AVG(daily_users) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW -- 当前行 + 前 6 行 = 7 天
) AS ma_7day,
AVG(daily_users) OVER (
ORDER BY date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW -- 30 日移动平均
) AS ma_30day
FROM daily_metrics;
-- 基于范围的窗口(非行数)
SELECT
sale_date,
amount,
AVG(amount) OVER (
ORDER BY sale_date
RANGE BETWEEN INTERVAL '6 days' PRECEDING AND CURRENT ROW
) AS rolling_7day_avg
FROM sales;
组内首/末值
-- 每位客户的首次购买日期和最近购买日期
SELECT DISTINCT
customer_id,
FIRST_VALUE(order_date) OVER (
PARTITION BY customer_id
ORDER BY order_date
) AS first_purchase,
LAST_VALUE(order_date) OVER (
PARTITION BY customer_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_purchase,
COUNT(*) OVER (PARTITION BY customer_id) AS total_orders
FROM orders;
实际业务示例
-- 每个类别的前 N 名
WITH ranked AS (
SELECT
category,
product_name,
revenue,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY revenue DESC) AS rn
FROM product_sales
)
SELECT category, product_name, revenue
FROM ranked
WHERE rn <= 3; -- 每个类别前 3 名产品
-- 会话分析:间隙与孤岛问题
WITH sessions AS (
SELECT
user_id,
event_time,
LEAD(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS next_event,
CASE
WHEN LEAD(event_time) OVER (PARTITION BY user_id ORDER BY event_time)
- event_time > INTERVAL '30 minutes'
THEN 1 ELSE 0
END AS session_end
FROM events
)
SELECT user_id, COUNT(*) AS session_count
FROM sessions
WHERE session_end = 1
GROUP BY user_id;
窗口函数消除了大多数分析型 SQL 中对自连接和子查询的需求。