Appearance
MySQL 窗口函数详解
窗口函数(Window Functions)是 MySQL 8.0 引入的重要特性,它允许我们在不改变结果集行数的情况下进行复杂的计算。本文将详细介绍 MySQL 窗口函数的概念、语法和应用场景。
1. 窗口函数基础概念
1.1 什么是窗口函数
窗口函数是一种特殊的函数,它可以对结果集中的一组相关行进行计算,同时保留原始行的详细信息。与普通聚合函数不同,窗口函数不会减少结果集的行数。
sql
-- 普通聚合函数 - 减少行数
SELECT department, AVG(salary) as avg_salary
FROM employees
GROUP BY department;
-- 窗口函数 - 保持行数
SELECT
employee_name,
department,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg_salary
FROM employees;1.2 窗口函数语法结构
sql
function_name([arguments]) OVER (
[PARTITION BY partition_expression]
[ORDER BY sort_expression [ASC|DESC]]
[frame_clause]
)- PARTITION BY: 将结果集分成多个分区
- ORDER BY: 在每个分区内对行进行排序
- frame_clause: 定义窗口的边界
2. 常用窗口函数
2.1 聚合窗口函数
sql
-- 创建示例表
CREATE TABLE sales_data (
id INT PRIMARY KEY AUTO_INCREMENT,
employee_name VARCHAR(50),
department VARCHAR(50),
sales_amount DECIMAL(10,2),
sales_date DATE
);
-- 插入测试数据
INSERT INTO sales_data (employee_name, department, sales_amount, sales_date) VALUES
('张三', '销售部', 15000.00, '2024-01-01'),
('李四', '销售部', 18000.00, '2024-01-02'),
('王五', '销售部', 12000.00, '2024-01-03'),
('赵六', '市场部', 20000.00, '2024-01-01'),
('钱七', '市场部', 16000.00, '2024-01-02'),
('孙八', '市场部', 19000.00, '2024-01-03');
-- 使用聚合窗口函数
SELECT
employee_name,
department,
sales_amount,
-- 计算部门总销售额
SUM(sales_amount) OVER (PARTITION BY department) as dept_total,
-- 计算部门平均销售额
AVG(sales_amount) OVER (PARTITION BY department) as dept_avg,
-- 计算部门最大销售额
MAX(sales_amount) OVER (PARTITION BY department) as dept_max,
-- 计算部门最小销售额
MIN(sales_amount) OVER (PARTITION BY department) as dept_min,
-- 计算部门销售计数
COUNT(*) OVER (PARTITION BY department) as dept_count
FROM sales_data
ORDER BY department, sales_amount DESC;2.2 排名窗口函数
sql
-- 排名窗口函数示例
SELECT
employee_name,
department,
sales_amount,
-- 排名 - 相同值排名相同,后续排名跳过
RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as rank,
-- 密集排名 - 相同值排名相同,后续排名不跳过
DENSE_RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as dense_rank,
-- 行号 - 每行都有唯一的行号
ROW_NUMBER() OVER (PARTITION BY department ORDER BY sales_amount DESC) as row_num,
-- 百分比排名
PERCENT_RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as percent_rank,
-- 累积分布
CUME_DIST() OVER (PARTITION BY department ORDER BY sales_amount DESC) as cume_dist
FROM sales_data;2.3 值窗口函数
sql
-- 值窗口函数示例
SELECT
employee_name,
department,
sales_amount,
-- 获取第一行的值
FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) as first_value,
-- 获取最后一行的值
LAST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) as last_value,
-- 获取第N行的值
NTH_VALUE(sales_amount, 2) OVER (PARTITION BY department ORDER BY sales_amount DESC) as second_value,
-- 获取前一行的值
LAG(sales_amount, 1) OVER (PARTITION BY department ORDER BY sales_amount DESC) as prev_value,
-- 获取后一行的值
LEAD(sales_amount, 1) OVER (PARTITION BY department ORDER BY sales_amount DESC) as next_value,
-- 获取前两行的值
LAG(sales_amount, 2, 0) OVER (PARTITION BY department ORDER BY sales_amount DESC) as prev_2_value
FROM sales_data;2.4 分析窗口函数
sql
-- 分析窗口函数示例
SELECT
employee_name,
department,
sales_amount,
-- 累积和
SUM(sales_amount) OVER (
PARTITION BY department
ORDER BY sales_amount DESC
ROWS UNBOUNDED PRECEDING
) as cumulative_sum,
-- 移动平均(3行窗口)
AVG(sales_amount) OVER (
PARTITION BY department
ORDER BY sales_amount DESC
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) as moving_avg_3
FROM sales_data;3. 窗口框架子句
3.1 ROWS 框架
sql
-- ROWS 框架示例
SELECT
employee_name,
department,
sales_amount,
sales_date,
-- 累积和 - 从分区开始到当前行
SUM(sales_amount) OVER (
PARTITION BY department
ORDER BY sales_date
ROWS UNBOUNDED PRECEDING
) as cumulative_sum,
-- 前3行的和
SUM(sales_amount) OVER (
PARTITION BY department
ORDER BY sales_date
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW
) as sum_last_3,
-- 前1行和后1行的和
AVG(sales_amount) OVER (
PARTITION BY department
ORDER BY sales_date
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
) as avg_around
FROM sales_data;3.2 RANGE 框架
sql
-- RANGE 框架示例(按值范围)
SELECT
employee_name,
department,
sales_amount,
-- 相同金额范围的计数
COUNT(*) OVER (
PARTITION BY department
ORDER BY sales_amount
RANGE BETWEEN 1000 PRECEDING AND 1000 FOLLOWING
) as similar_amount_count,
-- 相同或更低金额的和
SUM(sales_amount) OVER (
PARTITION BY department
ORDER BY sales_amount
RANGE UNBOUNDED PRECEDING
) as sum_up_to_amount
FROM sales_data;4. 实际应用场景
4.1 计算同比和环比
sql
-- 创建月度销售表
CREATE TABLE monthly_sales (
id INT PRIMARY KEY AUTO_INCREMENT,
sales_month DATE,
total_sales DECIMAL(12,2)
);
-- 插入数据
INSERT INTO monthly_sales (sales_month, total_sales) VALUES
('2024-01-01', 150000.00),
('2024-02-01', 180000.00),
('2024-03-01', 165000.00),
('2024-04-01', 190000.00),
('2024-05-01', 210000.00),
('2024-06-01', 195000.00);
-- 计算环比和同比
SELECT
sales_month,
total_sales,
-- 环比(与上月比较)
LAG(total_sales, 1) OVER (ORDER BY sales_month) as prev_month_sales,
ROUND(
(total_sales - LAG(total_sales, 1) OVER (ORDER BY sales_month)) /
LAG(total_sales, 1) OVER (ORDER BY sales_month) * 100, 2
) as month_over_month_growth,
-- 同比(与去年同期比较)
LAG(total_sales, 12) OVER (ORDER BY sales_month) as prev_year_sales,
ROUND(
(total_sales - LAG(total_sales, 12) OVER (ORDER BY sales_month)) /
LAG(total_sales, 12) OVER (ORDER BY sales_month) * 100, 2
) as year_over_year_growth
FROM monthly_sales;4.2 计算移动平均
sql
-- 计算7天移动平均
SELECT
sales_date,
daily_sales,
AVG(daily_sales) OVER (
ORDER BY sales_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as moving_avg_7_days,
-- 计算30天移动平均
AVG(daily_sales) OVER (
ORDER BY sales_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as moving_avg_30_days
FROM daily_sales_data;4.3 计算百分比排名
sql
-- 计算销售额在部门内的百分比排名
SELECT
employee_name,
department,
sales_amount,
-- 百分比排名
ROUND(PERCENT_RANK() OVER (PARTITION BY department ORDER BY sales_amount) * 100, 2) as percentile_rank,
-- 累积分布
ROUND(CUME_DIST() OVER (PARTITION BY department ORDER BY sales_amount) * 100, 2) as cumulative_percent,
-- 排名
RANK() OVER (PARTITION BY department ORDER BY sales_amount DESC) as rank_in_dept
FROM sales_data;4.4 计算首尾差异
sql
-- 计算与部门最高和最低销售额的差异
SELECT
employee_name,
department,
sales_amount,
-- 与最高销售额的差异
FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) - sales_amount as diff_from_top,
-- 与最低销售额的差异
sales_amount - FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount ASC) as diff_from_bottom,
-- 占最高销售额的百分比
ROUND(sales_amount / FIRST_VALUE(sales_amount) OVER (PARTITION BY department ORDER BY sales_amount DESC) * 100, 2) as percent_of_top
FROM sales_data;5. 性能优化建议
5.1 使用适当的索引
sql
-- 为窗口函数的 ORDER BY 列创建索引
CREATE INDEX idx_sales_date ON sales_data(sales_date);
CREATE INDEX idx_dept_sales ON sales_data(department, sales_amount);5.2 避免重复计算
sql
-- 不好的做法 - 重复计算
SELECT
employee_name,
department,
sales_amount,
AVG(sales_amount) OVER (PARTITION BY department) as avg_dept,
sales_amount - AVG(sales_amount) OVER (PARTITION BY department) as diff_from_avg
FROM sales_data;
-- 好的做法 - 使用子查询避免重复
SELECT
employee_name,
department,
sales_amount,
dept_avg,
sales_amount - dept_avg as diff_from_avg
FROM (
SELECT
employee_name,
department,
sales_amount,
AVG(sales_amount) OVER (PARTITION BY department) as dept_avg
FROM sales_data
) as subquery;总结
MySQL 窗口函数提供了强大的数据分析能力,主要优点包括:
- 保持行数 - 在计算聚合值的同时保留详细数据
- 灵活分区 - 支持按多个维度进行数据分组计算
- 排序支持 - 可以在分区内对数据进行排序
- 框架灵活 - 支持多种窗口框架定义
- 性能优化 - 比自连接或子查询更高效
窗口函数特别适用于:排名分析、同比环比计算、移动平均、累积计算、数据对比分析等场景。