SQL 查询优化实战
大约 14 分钟约 4091 字
SQL 查询优化实战
简介
SQL 查询优化是数据库性能调优的核心环节,通过分析执行计划、合理使用索引、改写低效 SQL 以及优化分页策略等手段,可以显著提升数据库的查询响应速度和系统吞吐量。本文将系统介绍 SQL 查询优化的实用方法和技巧。
特点
EXPLAIN 执行计划分析
MySQL EXPLAIN
-- 基本 EXPLAIN 用法
EXPLAIN SELECT o.order_no, u.username, o.total_amount
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.status = 'paid' AND o.created_at >= '2026-01-01';
-- EXPLAIN 输出关键字段解读
-- | 字段 | 说明 |
-- |------------|-------------------------------|
-- | id | 查询标识符,越大越先执行 |
-- | type | 访问类型(system/const/ref/range/index/ALL) |
-- | key | 实际使用的索引 |
-- | rows | 预估扫描行数 |
-- | Extra | 额外信息(Using index/Using filesort/Using temporary) |
-- MySQL 8.0+ 使用 EXPLAIN ANALYZE 获取实际执行统计
EXPLAIN ANALYZE
SELECT * FROM orders WHERE user_id = 100 AND status = 'shipped';
-- 查看优化器_TRACE(MySQL 5.6+)
SET optimizer_trace = 'enabled=on';
SELECT * FROM orders WHERE user_id = 100;
SELECT * FROM information_schema.OPTIMIZER_TRACE;
SET optimizer_trace = 'enabled=off';PostgreSQL EXPLAIN
-- PostgreSQL EXPLAIN
EXPLAIN SELECT * FROM orders WHERE user_id = 100;
-- 带实际执行时间和行数统计
EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 100;
-- 显示详细成本信息
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT o.order_no, u.username
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.created_at >= '2026-01-01';
-- 输出关键指标解读
-- | 指标 | 说明 |
-- |------------------|--------------------------|
-- | cost=0.00..25.00 | 估算成本(启动..总成本) |
-- | rows=1000 | 估算返回行数 |
-- | actual time=0.01 | 实际执行时间(毫秒) |
-- | loops=1 | 循环执行次数 |
-- | Buffers: shared hit | 共享缓存命中次数 |
-- SQL Server 查看执行计划
SET STATISTICS IO ON;
SET STATISTICS TIME ON;
SELECT * FROM orders WHERE user_id = 100;索引使用优化
合理使用索引
-- 场景:订单表查询优化
CREATE TABLE orders (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
order_no VARCHAR(32) NOT NULL,
user_id BIGINT NOT NULL,
status VARCHAR(20) NOT NULL,
total_amount DECIMAL(10,2),
created_at DATETIME NOT NULL,
INDEX idx_user_status (user_id, status),
INDEX idx_created_at (created_at),
INDEX idx_order_no (order_no)
);
-- 优化前:全表扫描(在索引列上使用函数)
SELECT * FROM orders WHERE DATE(created_at) = '2026-04-12';
-- type: ALL, rows: 1000000
-- 优化后:范围查询(可以利用索引)
SELECT * FROM orders
WHERE created_at >= '2026-04-12 00:00:00'
AND created_at < '2026-04-13 00:00:00';
-- type: range, key: idx_created_at, rows: 1200
-- 优化前:索引列上使用运算
SELECT * FROM orders WHERE user_id + 0 = 100;
-- type: ALL,索引失效
-- 优化后:直接等值查询
SELECT * FROM orders WHERE user_id = 100;
-- type: ref,使用索引
-- 避免隐式类型转换
-- order_no 是 VARCHAR 类型
SELECT * FROM orders WHERE order_no = 20260412001;
-- 索引失效!应该使用字符串
SELECT * FROM orders WHERE order_no = '20260412001';
-- LIKE 查询优化
-- 前缀通配符导致索引失效
SELECT * FROM orders WHERE order_no LIKE '%20260412%';
-- 改用前缀匹配可以利用索引
SELECT * FROM orders WHERE order_no LIKE '20260412%';覆盖索引
-- 覆盖索引:查询的所有列都包含在索引中
-- 创建覆盖索引
CREATE INDEX idx_user_status_created_amount
ON orders(user_id, status, created_at, total_amount);
-- 查询只需要索引中的列,无需回表
SELECT user_id, status, created_at, total_amount
FROM orders
WHERE user_id = 100 AND status = 'paid';
-- Extra: Using index(覆盖索引扫描)
-- 对比:需要回表的查询
SELECT user_id, status, created_at, total_amount, order_no
FROM orders
WHERE user_id = 100 AND status = 'paid';
-- Extra: NULL(需要回表获取 order_no)
-- 使用 INCLUDE 子句(SQL Server / PostgreSQL)
CREATE INDEX idx_orders_covering
ON orders(user_id, status)
INCLUDE (total_amount, created_at);查询改写技巧
SQL 改写优化
-- 1. 避免 SELECT *
-- 优化前
SELECT * FROM orders WHERE user_id = 100;
-- 优化后:只查询需要的列
SELECT order_no, total_amount, status FROM orders WHERE user_id = 100;
-- 2. 用 EXISTS 替代 IN(子查询大数据量时)
-- 优化前
SELECT * FROM orders
WHERE user_id IN (SELECT id FROM users WHERE status = 'active');
-- 优化后
SELECT o.* FROM orders o
WHERE EXISTS (
SELECT 1 FROM users u WHERE u.id = o.user_id AND u.status = 'active'
);
-- 3. 用 JOIN 替代子查询
-- 优化前:相关子查询
SELECT o.*, (
SELECT COUNT(*) FROM order_items WHERE order_id = o.id
) AS item_count
FROM orders o WHERE o.user_id = 100;
-- 优化后:JOIN + 分组
SELECT o.*, COALESCE(oi_cnt.cnt, 0) AS item_count
FROM orders o
LEFT JOIN (
SELECT order_id, COUNT(*) AS cnt
FROM order_items GROUP BY order_id
) oi_cnt ON oi_cnt.order_id = o.id
WHERE o.user_id = 100;
-- 4. UNION ALL 替代 UNION(不需要去重时)
-- 优化前:UNION 会排序去重
SELECT name FROM products WHERE category = '电子'
UNION
SELECT name FROM products WHERE category = '办公';
-- 优化后:UNION ALL 不去重
SELECT name FROM products WHERE category = '电子'
UNION ALL
SELECT name FROM products WHERE category = '办公';批量操作优化
-- 1. 批量插入代替逐条插入
-- 优化前
INSERT INTO order_items (order_id, product_id, quantity) VALUES (1, 101, 2);
INSERT INTO order_items (order_id, product_id, quantity) VALUES (1, 102, 1);
INSERT INTO order_items (order_id, product_id, quantity) VALUES (1, 103, 3);
-- 优化后
INSERT INTO order_items (order_id, product_id, quantity) VALUES
(1, 101, 2), (1, 102, 1), (1, 103, 3);
-- 2. 使用 CASE WHEN 批量更新
UPDATE products SET price = CASE
WHEN id = 1 THEN 99.00
WHEN id = 2 THEN 199.00
WHEN id = 3 THEN 299.00
ELSE price
END
WHERE id IN (1, 2, 3);
-- 3. 大数据量删除分批处理
-- 优化前:大事务锁表
DELETE FROM logs WHERE created_at < '2025-01-01';
-- 优化后:分批删除
DELETE FROM logs WHERE created_at < '2025-01-01' LIMIT 1000;
-- 循环执行直到影响行数为 0分页查询优化
分页优化方案
-- 场景:100 万条数据的分页查询
-- 方案一:传统 LIMIT OFFSET(性能差)
SELECT * FROM orders ORDER BY id LIMIT 10 OFFSET 500000;
-- 需要扫描 500010 行,丢弃前 500000 行
-- 方案二:游标分页(推荐)
-- 第一页
SELECT * FROM orders ORDER BY id LIMIT 10;
-- 下一页(使用上一页最后一条记录的 id)
SELECT * FROM orders WHERE id > 500000 ORDER BY id LIMIT 10;
-- 只扫描 10 行,性能极佳
-- 方案三:子查询优化 OFFSET 分页
SELECT o.* FROM orders o
INNER JOIN (
SELECT id FROM orders ORDER BY id LIMIT 10 OFFSET 500000
) AS tmp ON o.id = tmp.id;
-- 子查询走覆盖索引,减少回表次数
-- 方案四:使用 BETWEEN(已知 ID 范围时)
SELECT * FROM orders
WHERE id BETWEEN 500001 AND 500010
ORDER BY id;
-- 方案五:SQL Server 分页(OFFSET FETCH)
SELECT * FROM orders
ORDER BY id
OFFSET 500000 ROWS FETCH NEXT 10 ROWS ONLY;
-- 方案六:PostgreSQL Keyset 分页
-- 创建复合排序索引
CREATE INDEX idx_orders_created_id ON orders(created_at DESC, id DESC);
-- 第一页
SELECT id, order_no, created_at FROM orders
ORDER BY created_at DESC, id DESC LIMIT 10;
-- 翻页(使用上一页最后一条记录的排序值)
SELECT id, order_no, created_at FROM orders
WHERE (created_at, id) < ('2026-04-12 10:30:00', 500000)
ORDER BY created_at DESC, id DESC LIMIT 10;分页与总数统计优化
-- 优化前:COUNT(*) 全表统计
SELECT COUNT(*) FROM orders WHERE status = 'paid';
-- 大表上非常慢
-- 方案一:使用近似计数(PostgreSQL)
SELECT reltuples::bigint AS estimate
FROM pg_class WHERE relname = 'orders';
-- 方案二:维护计数表
CREATE TABLE table_counts (
table_name VARCHAR(100) PRIMARY KEY,
row_count BIGINT NOT NULL DEFAULT 0
);
-- 通过触发器维护
CREATE OR REPLACE FUNCTION update_order_count() RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
UPDATE table_counts SET row_count = row_count + 1 WHERE table_name = 'orders';
ELSIF TG_OP = 'DELETE' THEN
UPDATE table_counts SET row_count = row_count - 1 WHERE table_name = 'orders';
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- 方案三:缓存总数,前端只显示"上一页/下一页"
-- 不查询总数,通过判断返回行数是否等于 LIMIT 判断是否有下一页
SELECT * FROM orders WHERE status = 'paid'
ORDER BY created_at DESC LIMIT 11; -- 多取一条判断是否有下一页优点
JOIN 优化深入
JOIN 类型选择
-- 1. INNER JOIN — 只返回匹配的行
-- 2. LEFT JOIN — 返回左表所有行 + 匹配的右表行
-- 3. RIGHT JOIN — 返回右表所有行 + 匹配的左表行
-- 4. CROSS JOIN — 笛卡尔积(慎用)
-- 5. SELF JOIN — 自连接
-- JOIN 优化要点
-- 避免 SELECT *,只取需要的字段
-- 差
SELECT * FROM orders o JOIN users u ON o.user_id = u.id;
-- 好
SELECT o.id, o.order_no, o.total_amount, u.name
FROM orders o INNER JOIN users u ON o.user_id = u.id;
-- 确保 JOIN 字段有索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_users_id ON users(id); -- 主键已有索引
-- 小表驱动大表
-- 优化器通常自动选择,但可以验证
EXPLAIN SELECT * FROM large_table l
JOIN small_table s ON l.id = s.fk_id;
-- 确认 small_table 是驱动表(Nested Loop 的外层)
-- 多表 JOIN 顺序
-- 3 张以上的 JOIN,考虑减少中间结果集
-- 差:先 JOIN 大表
SELECT * FROM orders o -- 1000 万行
JOIN order_items oi ON o.id = oi.order_id -- 3000 万行
JOIN products p ON oi.product_id = p.id -- 10 万行
WHERE p.category_id = 5;
-- 好:先过滤小表,减少 JOIN 数据量
SELECT * FROM products p -- 10 万行
WHERE p.category_id = 5 -- 先过滤到 5000 行
JOIN order_items oi ON p.id = oi.product_id -- 再 JOIN
JOIN orders o ON oi.order_id = o.id;
-- 避免不必要的 JOIN
-- 差:JOIN 了但只用到主表的字段
SELECT o.id, o.order_no
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.status = 'paid';
-- 好:不需要 JOIN
SELECT id, order_no
FROM orders
WHERE status = 'paid';子查询优化
-- 1. 标量子查询改 JOIN
-- 差:标量子查询对每行执行一次
SELECT
o.id,
o.order_no,
(SELECT name FROM users WHERE id = o.user_id) AS user_name,
(SELECT SUM(quantity) FROM order_items WHERE order_id = o.id) AS item_count
FROM orders o;
-- 好:JOIN 一次性查询
SELECT
o.id,
o.order_no,
u.name AS user_name,
COALESCE(oi.item_count, 0) AS item_count
FROM orders o
LEFT JOIN users u ON o.user_id = u.id
LEFT JOIN (
SELECT order_id, COUNT(*) AS item_count
FROM order_items
GROUP BY order_id
) oi ON o.id = oi.order_id;
-- 2. IN vs EXISTS vs JOIN
-- 数据量小 → JOIN 通常最快
-- 子查询结果集小 → EXISTS 通常快
-- 主表结果集小 → IN 通常快
-- 3. 关联子查询 vs 非关联子查询
-- 非关联子查询(只执行一次)
SELECT * FROM orders
WHERE user_id IN (SELECT id FROM users WHERE status = 'active');
-- 关联子查询(对外层每行执行一次)
SELECT * FROM orders o
WHERE EXISTS (SELECT 1 FROM users u WHERE u.id = o.user_id AND u.status = 'active');
-- 4. NOT IN vs NOT EXISTS
-- NOT IN 遇到 NULL 值返回空结果
-- 差
SELECT * FROM orders WHERE user_id NOT IN (SELECT id FROM blacklist);
-- 如果 blacklist.id 有 NULL,结果永远为空
-- 好
SELECT * FROM orders o
WHERE NOT EXISTS (SELECT 1 FROM blacklist b WHERE b.user_id = o.user_id);
-- 5. UNION vs UNION ALL
-- UNION 自动去重(排序开销大)
-- UNION ALL 不去重(快得多)
-- 如果确定没有重复或不需要去重,用 UNION ALL
SELECT user_id FROM orders WHERE status = 'paid'
UNION ALL
SELECT user_id FROM returns WHERE status = 'completed';聚合优化
GROUP BY 优化
-- 1. GROUP BY 前先过滤
-- 差:先聚合再过滤
SELECT category_id, COUNT(*) AS cnt, SUM(amount) AS total
FROM orders
GROUP BY category_id
HAVING COUNT(*) > 100;
-- 好:先过滤再聚合
SELECT category_id, COUNT(*) AS cnt, SUM(amount) AS total
FROM orders
WHERE created_at >= '2026-01-01' -- 缩小数据范围
GROUP BY category_id
HAVING COUNT(*) > 100;
-- 2. GROUP BY 使用索引
-- 创建索引 (group_col, agg_col)
CREATE INDEX idx_orders_category_date ON orders(category_id, created_at);
-- 3. 避免在 SELECT 中使用非聚合列
-- 差(MySQL 默认可运行,其他数据库报错)
SELECT category_id, user_id, COUNT(*)
FROM orders
GROUP BY category_id;
-- user_id 不在 GROUP BY 中,结果不确定
-- 好
SELECT category_id, COUNT(*)
FROM orders
GROUP BY category_id;
-- 4. ROLLUP 多级汇总
SELECT
category_id,
product_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM orders
GROUP BY ROLLUP(category_id, product_id);
-- 产生小计和总计行
-- 5. CUBE 交叉汇总
SELECT
category_id,
region,
COUNT(*) AS order_count
FROM orders
GROUP BY CUBE(category_id, region);
-- 产生所有维度的组合汇总窗口函数替代自连接
-- 计算每个分类中价格排名前 3 的商品
-- 差:自连接(性能差)
SELECT p1.*
FROM products p1
WHERE (
SELECT COUNT(*) FROM products p2
WHERE p2.category_id = p1.category_id
AND p2.price > p1.price
) < 3;
-- 好:窗口函数
WITH ranked AS (
SELECT *,
DENSE_RANK() OVER (PARTITION BY category_id ORDER BY price DESC) AS price_rank
FROM products
)
SELECT * FROM ranked WHERE price_rank <= 3;
-- 计算累计百分比
SELECT
category_id,
product_name,
amount,
SUM(amount) OVER (PARTITION BY category_id ORDER BY amount DESC) AS cumulative,
SUM(amount) OVER (PARTITION BY category_id) AS category_total,
SUM(amount) OVER (PARTITION BY category_id ORDER BY amount DESC)
* 100.0 / SUM(amount) OVER (PARTITION BY category_id) AS cumulative_pct
FROM product_sales;
-- 去除连续重复数据
WITH grouped AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY status, grp ORDER BY id) AS rn
FROM (
SELECT *,
id - ROW_NUMBER() OVER (ORDER BY id) AS grp
FROM status_log
) t
)
SELECT * FROM grouped WHERE rn = 1;批量操作优化
批量 INSERT
-- MySQL 批量插入
-- 差:逐条插入
INSERT INTO order_items (order_id, product_id, quantity) VALUES (1, 101, 2);
INSERT INTO order_items (order_id, product_id, quantity) VALUES (1, 102, 1);
INSERT INTO order_items (order_id, product_id, quantity) VALUES (1, 103, 5);
-- 好:批量插入(一条语句)
INSERT INTO order_items (order_id, product_id, quantity) VALUES
(1, 101, 2), (1, 102, 1), (1, 103, 5);
-- 更大批量建议分批(每批 1000-5000 条)
-- 避免单条 SQL 过大导致锁超时或 binlog 暴涨
-- SQL Server 批量插入
-- BULK INSERT(最快)
BULK INSERT order_items FROM '/tmp/items.csv'
WITH (FIELDTERMINATOR = ',', ROWTERMINATOR = '\n', FIRSTROW = 2);
-- INSERT ... SELECT(从其他表批量导入)
INSERT INTO order_items_archive (order_id, product_id, quantity)
SELECT order_id, product_id, quantity
FROM order_items
WHERE created_at < '2025-01-01';
-- PostgreSQL COPY 命令(最快的方式)
COPY order_items (order_id, product_id, quantity) FROM '/tmp/items.csv' WITH CSV HEADER;批量 UPDATE
-- MySQL 批量更新
-- 方案一:CASE WHEN(适合少量更新)
UPDATE products SET price = CASE id
WHEN 1 THEN 99.9
WHEN 2 THEN 199.9
WHEN 3 THEN 299.9
END WHERE id IN (1, 2, 3);
-- 方案二:临时表 JOIN
CREATE TEMPORARY TABLE temp_updates (id INT, price DECIMAL(18,2));
INSERT INTO temp_updates VALUES (1, 99.9), (2, 199.9), (3, 299.9);
UPDATE products p
INNER JOIN temp_updates t ON p.id = t.id
SET p.price = t.price;
DROP TEMPORARY TABLE temp_updates;
-- 方案三:INSERT ... ON DUPLICATE KEY UPDATE(MySQL)
INSERT INTO products (id, price) VALUES
(1, 99.9), (2, 199.9), (3, 299.9)
ON DUPLICATE KEY UPDATE price = VALUES(price);
-- SQL Server MERGE(批量同步)
MERGE INTO products AS target
USING (VALUES (1, 99.9), (2, 199.9), (3, 299.9)) AS source (id, price)
ON target.id = source.id
WHEN MATCHED THEN UPDATE SET target.price = source.price;缺点
总结
SQL 查询优化是数据库性能调优中性价比最高的手段,通过 EXPLAIN 分析执行计划、合理创建和使用索引、改写低效 SQL 以及优化分页策略,可以在不增加硬件成本的情况下显著提升系统性能。优化是一个持续的过程,需要结合实际的业务场景和数据特征制定合适的优化策略。
关键知识点
- 数据库主题一定要同时看数据模型、读写模式和执行代价。
- 很多性能问题不是 SQL 语法问题,而是索引、统计信息、事务和数据分布问题。
- 高可用、备份、迁移和治理与查询优化同样重要。
- 先把数据模型、访问模式和执行代价绑定起来理解。
项目落地视角
- 所有优化前后都保留执行计划、样本 SQL 和关键指标对比。
- 上线前准备回滚脚本、备份点和校验方案。
- 把连接池、锁等待、慢查询和容量增长纳入日常巡检。
- 保留执行计划、样本 SQL、索引定义和优化前后指标。
常见误区
- 脱离真实数据分布讨论索引或分片。
- 只看单条 SQL,不看整条业务链路的事务和锁。
- 把测试环境结论直接等同于生产环境结论。
- 脱离真实数据分布设计索引。
进阶路线
- 继续向执行计划、存储引擎、复制机制和数据治理层深入。
- 把主题与 ORM、缓存、消息队列和归档策略联动起来思考。
- 沉淀成数据库设计规范、SQL 审核规则和变更流程。
- 继续深入存储引擎、复制机制、归档与冷热分层治理。
适用场景
- 当你准备把《SQL 查询优化实战》真正落到项目里时,最适合先在一个独立模块或最小样例里验证关键路径。
- 适合数据建模、查询优化、事务控制、高可用和迁移治理。
- 当系统开始遇到慢查询、锁冲突、热点数据或容量增长时,这类主题价值最高。
落地建议
- 先分析真实查询模式、数据量级和写入特征,再决定索引或分片策略。
- 所有优化结论都结合执行计划、样本数据和监控指标验证。
- 高风险操作前准备备份、回滚脚本与校验 SQL。
排错清单
- 先确认瓶颈在 CPU、I/O、锁等待、网络还是 SQL 本身。
- 检查执行计划是否走错索引、是否发生排序或全表扫描。
- 排查长事务、隐式类型转换、统计信息过期和参数嗅探。
复盘问题
- 如果把《SQL 查询优化实战》放进你的当前项目,最先要验证的输入、输出和失败路径分别是什么?
- 《SQL 查询优化实战》最容易在什么规模、什么边界条件下暴露问题?你会用什么指标或日志去确认?
- 相比默认实现或替代方案,采用《SQL 查询优化实战》最大的收益和代价分别是什么?
