Skip to content

MySQL 窗口函数详解

窗口函数(Window Functions)是 MySQL 8.0 引入的重要特性,它允许我们在不改变结果集行数的情况下进行复杂的计算。本文将详细介绍 MySQL 窗口函数的概念、语法和应用场景。

1. 窗口函数基础概念

1.1 什么是窗口函数

窗口函数是一种特殊的函数,它可以对结果集中的一组相关行进行计算,同时保留原始行的详细信息。与普通聚合函数不同,窗口函数不会减少结果集的行数。

sql
-- 普通聚合函数 - 减少行数
SELECT department, AVG(salary) as avg_salary
FROM employees
GROUP BY department;

-- 窗口函数 - 保持行数
SELECT 
    employee_name,
    department,
    salary,
    AVG(salary) OVER (PARTITION BY department) as dept_avg_salary
FROM employees;

1.2 窗口函数语法结构

sql
function_name([arguments]) OVER (
    [PARTITION BY partition_expression]
    [ORDER BY sort_expression [ASC|DESC]]
    [frame_clause]
)
  • PARTITION BY: 将结果集分成多个分区
  • ORDER BY: 在每个分区内对行进行排序
  • frame_clause: 定义窗口的边界

2. 常用窗口函数

2.1 聚合窗口函数

sql
-- 创建示例表
CREATE TABLE sales_data (
    id INT PRIMARY KEY AUTO_INCREMENT,
    employee_name VARCHAR(50),
    department VARCHAR(50),
    sales_amount DECIMAL(10,2),
    sales_date DATE
);

-- 插入测试数据
INSERT INTO sales_data (employee_name, department, sales_amount, sales_date) VALUES
('张三', '销售部', 15000.00, '2024-01-01'),
('李四', '销售部', 18000.00, '2024-01-02'),
('王五', '销售部', 12000.00, '2024-01-03'),
('赵六', '市场部', 20000.00, '2024-01-01'),
('钱七', '市场部', 16000.00, '2024-01-02'),
('孙八', '市场部', 19000.00, '2024-01-03');

-- 使用聚合窗口函数
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 计算部门总销售额
    SUM(sales_amount) OVER (PARTITION BY department) as dept_total,
    -- 计算部门平均销售额
    AVG(sales_amount) OVER (PARTITION BY department) as dept_avg,
    -- 计算部门最大销售额
    MAX(sales_amount) OVER (PARTITION BY department) as dept_max,
    -- 计算部门最小销售额
    MIN(sales_amount) OVER (PARTITION BY department) as dept_min,
    -- 计算部门销售计数
    COUNT(*) OVER (PARTITION BY department) as dept_count
FROM sales_data
ORDER BY department, sales_amount DESC;

2.2 排名窗口函数

sql
-- 排名窗口函数示例
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 排名 - 相同值排名相同,后续排名跳过
    RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as rank,
    -- 密集排名 - 相同值排名相同,后续排名不跳过
    DENSE_RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as dense_rank,
    -- 行号 - 每行都有唯一的行号
    ROW_NUMBER() OVER (PARTITION BY department ORDER BY sales_amount DESC) as row_num,
    -- 百分比排名
    PERCENT_RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as percent_rank,
    -- 累积分布
    CUME_DIST() OVER (PARTITION BY department ORDER BY sales_amount DESC) as cume_dist
FROM sales_data;

2.3 值窗口函数

sql
-- 值窗口函数示例
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 获取第一行的值
    FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) as first_value,
    -- 获取最后一行的值
    LAST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) as last_value,
    -- 获取第N行的值
    NTH_VALUE(sales_amount, 2) OVER (PARTITION BY department ORDER BY sales_amount DESC) as second_value,
    -- 获取前一行的值
    LAG(sales_amount, 1) OVER (PARTITION BY department ORDER BY sales_amount DESC) as prev_value,
    -- 获取后一行的值
    LEAD(sales_amount, 1) OVER (PARTITION BY department ORDER BY sales_amount DESC) as next_value,
    -- 获取前两行的值
    LAG(sales_amount, 2, 0) OVER (PARTITION BY department ORDER BY sales_amount DESC) as prev_2_value
FROM sales_data;

2.4 分析窗口函数

sql
-- 分析窗口函数示例
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 累积和
    SUM(sales_amount) OVER (
        PARTITION BY department 
        ORDER BY sales_amount DESC 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_sum,
    -- 移动平均(3行窗口)
    AVG(sales_amount) OVER (
        PARTITION BY department 
        ORDER BY sales_amount DESC 
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
    ) as moving_avg_3
FROM sales_data;

3. 窗口框架子句

3.1 ROWS 框架

sql
-- ROWS 框架示例
SELECT 
    employee_name,
    department,
    sales_amount,
    sales_date,
    -- 累积和 - 从分区开始到当前行
    SUM(sales_amount) OVER (
        PARTITION BY department 
        ORDER BY sales_date 
        ROWS UNBOUNDED PRECEDING
    ) as cumulative_sum,
    -- 前3行的和
    SUM(sales_amount) OVER (
        PARTITION BY department 
        ORDER BY sales_date 
        ROWS BETWEEN 3 PRECEDING AND CURRENT ROW
    ) as sum_last_3,
    -- 前1行和后1行的和
    AVG(sales_amount) OVER (
        PARTITION BY department 
        ORDER BY sales_date 
        ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
    ) as avg_around
FROM sales_data;

3.2 RANGE 框架

sql
-- RANGE 框架示例(按值范围)
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 相同金额范围的计数
    COUNT(*) OVER (
        PARTITION BY department 
        ORDER BY sales_amount 
        RANGE BETWEEN 1000 PRECEDING AND 1000 FOLLOWING
    ) as similar_amount_count,
    -- 相同或更低金额的和
    SUM(sales_amount) OVER (
        PARTITION BY department 
        ORDER BY sales_amount 
        RANGE UNBOUNDED PRECEDING
    ) as sum_up_to_amount
FROM sales_data;

4. 实际应用场景

4.1 计算同比和环比

sql
-- 创建月度销售表
CREATE TABLE monthly_sales (
    id INT PRIMARY KEY AUTO_INCREMENT,
    sales_month DATE,
    total_sales DECIMAL(12,2)
);

-- 插入数据
INSERT INTO monthly_sales (sales_month, total_sales) VALUES
('2024-01-01', 150000.00),
('2024-02-01', 180000.00),
('2024-03-01', 165000.00),
('2024-04-01', 190000.00),
('2024-05-01', 210000.00),
('2024-06-01', 195000.00);

-- 计算环比和同比
SELECT 
    sales_month,
    total_sales,
    -- 环比(与上月比较)
    LAG(total_sales, 1) OVER (ORDER BY sales_month) as prev_month_sales,
    ROUND(
        (total_sales - LAG(total_sales, 1) OVER (ORDER BY sales_month)) / 
        LAG(total_sales, 1) OVER (ORDER BY sales_month) * 100, 2
    ) as month_over_month_growth,
    -- 同比(与去年同期比较)
    LAG(total_sales, 12) OVER (ORDER BY sales_month) as prev_year_sales,
    ROUND(
        (total_sales - LAG(total_sales, 12) OVER (ORDER BY sales_month)) / 
        LAG(total_sales, 12) OVER (ORDER BY sales_month) * 100, 2
    ) as year_over_year_growth
FROM monthly_sales;

4.2 计算移动平均

sql
-- 计算7天移动平均
SELECT 
    sales_date,
    daily_sales,
    AVG(daily_sales) OVER (
        ORDER BY sales_date 
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) as moving_avg_7_days,
    -- 计算30天移动平均
    AVG(daily_sales) OVER (
        ORDER BY sales_date 
        ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
    ) as moving_avg_30_days
FROM daily_sales_data;

4.3 计算百分比排名

sql
-- 计算销售额在部门内的百分比排名
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 百分比排名
    ROUND(PERCENT_RANK() OVER (PARTITION BY department ORDER BY sales_amount) * 100, 2) as percentile_rank,
    -- 累积分布
    ROUND(CUME_DIST() OVER (PARTITION BY department ORDER BY sales_amount) * 100, 2) as cumulative_percent,
    -- 排名
    RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as rank_in_dept
FROM sales_data;

4.4 计算首尾差异

sql
-- 计算与部门最高和最低销售额的差异
SELECT 
    employee_name,
    department,
    sales_amount,
    -- 与最高销售额的差异
    FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) - sales_amount as diff_from_top,
    -- 与最低销售额的差异
    sales_amount - FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount ASC) as diff_from_bottom,
    -- 占最高销售额的百分比
    ROUND(sales_amount / FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) * 100, 2) as percent_of_top
FROM sales_data;

5. 性能优化建议

5.1 使用适当的索引

sql
-- 为窗口函数的 ORDER BY 列创建索引
CREATE INDEX idx_sales_date ON sales_data(sales_date);
CREATE INDEX idx_dept_sales ON sales_data(department, sales_amount);

5.2 避免重复计算

sql
-- 不好的做法 - 重复计算
SELECT 
    employee_name,
    department,
    sales_amount,
    AVG(sales_amount) OVER (PARTITION BY department) as avg_dept,
    sales_amount - AVG(sales_amount) OVER (PARTITION BY department) as diff_from_avg
FROM sales_data;

-- 好的做法 - 使用子查询避免重复
SELECT 
    employee_name,
    department,
    sales_amount,
    dept_avg,
    sales_amount - dept_avg as diff_from_avg
FROM (
    SELECT 
        employee_name,
        department,
        sales_amount,
        AVG(sales_amount) OVER (PARTITION BY department) as dept_avg
    FROM sales_data
) as subquery;

总结

MySQL 窗口函数提供了强大的数据分析能力,主要优点包括:

  1. 保持行数 - 在计算聚合值的同时保留详细数据
  2. 灵活分区 - 支持按多个维度进行数据分组计算
  3. 排序支持 - 可以在分区内对数据进行排序
  4. 框架灵活 - 支持多种窗口框架定义
  5. 性能优化 - 比自连接或子查询更高效

窗口函数特别适用于:排名分析、同比环比计算、移动平均、累积计算、数据对比分析等场景。

用心写作,用技术改变世界