PostgreSQL 进阶
大约 11 分钟约 3275 字
PostgreSQL 进阶
简介
PostgreSQL 的强项不仅是"开源关系型数据库",更在于它把事务一致性、复杂查询、扩展能力、JSON 支持、窗口函数和丰富索引体系融合在一起。进入进阶阶段后,重点不再是基本 CRUD,而是如何利用 CTE、窗口函数、UPSERT、JSONB、锁机制和高级索引解决真实业务问题。
PostgreSQL 被誉为"世界上最先进的开源关系型数据库",其 MVCC(多版本并发控制)机制、丰富的数据类型(数组、JSONB、UUID、HStore 等)、强大的扩展生态(PostGIS、pgvector、TimescaleDB 等)使其在复杂业务系统中具备极高的适用性。进阶阶段需要掌握的核心能力包括:复杂 SQL 编写(CTE、窗口函数、递归)、并发控制(锁、隔离级别、 advisory lock)、半结构化数据处理(JSONB)、性能调优(索引类型、执行计划分析)和高级运维(VACUUM、连接池、逻辑复制)。
特点
实现
CTE、窗口函数与递归查询
-- CTE:先筛选支付成功订单,再做聚合
-- CTE (Common Table Expression) 本质上是临时命名的结果集
-- 可以被后续查询引用,提高 SQL 可读性
WITH paid_orders AS (
SELECT customer_id, amount, created_at
FROM orders
WHERE status = 'paid'
AND created_at >= DATE '2024-01-01'
)
SELECT customer_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM paid_orders
GROUP BY customer_id
ORDER BY total_amount DESC;-- 多个 CTE 链式使用
WITH
-- 第一步:筛选有效订单
paid_orders AS (
SELECT customer_id, amount, created_at, product_category
FROM orders
WHERE status = 'paid'
AND created_at >= DATE '2024-01-01'
),
-- 第二步:按客户汇总
customer_totals AS (
SELECT customer_id,
COUNT(*) AS order_count,
SUM(amount) AS total_amount,
MAX(created_at) AS last_order_date
FROM paid_orders
GROUP BY customer_id
),
-- 第三步:标记 VIP 客户
vip_customers AS (
SELECT *,
CASE WHEN total_amount >= 10000 THEN 'VIP'
WHEN total_amount >= 5000 THEN 'Gold'
ELSE 'Normal'
END AS customer_level
FROM customer_totals
)
SELECT customer_level,
COUNT(*) AS customer_count,
AVG(total_amount) AS avg_amount
FROM vip_customers
GROUP BY customer_level
ORDER BY avg_amount DESC;-- CTE 的两种形式:普通 CTE 和可写 CTE
-- 普通只读 CTE
WITH active_users AS (
SELECT id, email FROM users WHERE is_active = true
)
SELECT * FROM active_users;
-- 可写 CTE(PostgreSQL 独有特性):可以在 CTE 中执行 INSERT/UPDATE/DELETE
WITH updated_orders AS (
UPDATE orders
SET status = 'expired'
WHERE status = 'pending'
AND created_at < NOW() - INTERVAL '30 days'
RETURNING id, customer_id
)
INSERT INTO order_archive (order_id, customer_id, archived_at)
SELECT id, customer_id, NOW()
FROM updated_orders;
-- 一条 SQL 完成过期订单归档,事务内保证原子性-- 窗口函数:每个客户按金额排序取 Top 3 订单
SELECT *
FROM (
SELECT id,
customer_id,
amount,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY amount DESC) AS rn
FROM orders
WHERE status = 'paid'
) t
WHERE rn <= 3;-- 常用窗口函数分类
-- 1. 排名函数
SELECT
product_id,
sales_amount,
ROW_NUMBER() OVER (ORDER BY sales_amount DESC) AS row_num, -- 无并列,1,2,3,4
RANK() OVER (ORDER BY sales_amount DESC) AS rank, -- 有并列,1,1,3,4
DENSE_RANK() OVER (ORDER BY sales_amount DESC) AS dense_rank -- 有并列,1,1,2,3
FROM product_sales;
-- 2. 聚合窗口函数:累计求和、移动平均
SELECT
order_date,
daily_revenue,
SUM(daily_revenue) OVER (ORDER BY order_date) AS cumulative_revenue,
AVG(daily_revenue) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7day
FROM daily_sales
WHERE order_date >= DATE '2024-01-01';
-- 3. 取首尾值
SELECT
customer_id,
order_date,
amount,
FIRST_VALUE(amount) OVER (
PARTITION BY customer_id ORDER BY order_date
) AS first_order_amount,
LAST_VALUE(amount) OVER (
PARTITION BY customer_id ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_order_amount,
LAG(order_date, 1) OVER (
PARTITION BY customer_id ORDER BY order_date
) AS prev_order_date
FROM orders
WHERE status = 'paid';
-- 4. 百分位统计
SELECT
employee_id,
department,
salary,
NTILE(4) OVER (PARTITION BY department ORDER BY salary DESC) AS salary_quartile,
PERCENT_RANK() OVER (PARTITION BY department ORDER BY salary) AS pct_rank
FROM employees;-- 递归查询:组织架构/分类树
WITH RECURSIVE category_tree AS (
-- 锚点查询:从根节点开始
SELECT id, parent_id, name, 1 AS level
FROM categories
WHERE parent_id IS NULL
UNION ALL
-- 递归部分:连接子节点
SELECT c.id, c.parent_id, c.name, ct.level + 1
FROM categories c
JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT *
FROM category_tree
ORDER BY level, id;-- 递归查询实用案例:权限路径计算
WITH RECURSIVE permission_path AS (
-- 从叶子权限节点向上追溯到根
SELECT id, parent_id, name, ARRAY[id] AS path
FROM permissions
WHERE id = 42 -- 目标权限节点
UNION ALL
SELECT p.id, p.parent_id, p.name, pp.path || p.id
FROM permissions p
JOIN permission_path pp ON p.id = pp.parent_id
)
SELECT id, name, path
FROM permission_path
ORDER BY array_length(path, 1);
-- 递归查询实用案例:生成日期序列
WITH RECURSIVE date_series AS (
SELECT DATE '2024-01-01' AS dt
UNION ALL
SELECT dt + 1 FROM date_series WHERE dt < DATE '2024-12-31'
)
SELECT dt,
EXTRACT(DOW FROM dt) AS day_of_week,
CASE WHEN EXTRACT(DOW FROM dt) IN (0, 6) THEN 'weekend' ELSE 'workday' END AS day_type
FROM date_series;UPSERT、RETURNING 与 Advisory Lock
-- UPSERT:存在则更新,不存在则插入
INSERT INTO inventory (sku_id, stock, updated_at)
VALUES (1001, 50, NOW())
ON CONFLICT (sku_id)
DO UPDATE SET
stock = EXCLUDED.stock,
updated_at = NOW();-- UPSERT 的高级用法
-- 场景:幂等消费消息(按消息ID去重)
CREATE TABLE message_log (
message_id VARCHAR(64) PRIMARY KEY,
topic VARCHAR(128) NOT NULL,
payload JSONB,
consumed_at TIMESTAMPTZ DEFAULT NOW()
);
INSERT INTO message_log (message_id, topic, payload)
VALUES ('msg-001', 'order.created', '{"orderId": 1001}')
ON CONFLICT (message_id) DO NOTHING;
-- 已存在则什么都不做(幂等)
-- 场景:库存原子更新
INSERT INTO inventory (sku_id, stock, version)
VALUES (1001, 50, 1)
ON CONFLICT (sku_id)
DO UPDATE SET
stock = inventory.stock + EXCLUDED.stock,
version = inventory.version + 1
WHERE inventory.version = EXCLUDED.version
RETURNING sku_id, stock, version;
-- 乐观锁更新,版本不匹配则不更新-- RETURNING:写入后直接拿回结果
INSERT INTO orders (customer_id, amount, status)
VALUES (1001, 299.00, 'pending')
RETURNING id, created_at;
-- RETURNING 配合 CTE 实现复杂的写后读
WITH new_order AS (
INSERT INTO orders (customer_id, amount, status)
VALUES (1001, 299.00, 'pending')
RETURNING id, customer_id, amount
),
order_items AS (
INSERT INTO order_items (order_id, product_id, quantity, price)
SELECT id, 1001, 2, 149.50 FROM new_order
RETURNING order_id, product_id
)
SELECT n.id AS order_id, n.amount, oi.product_id
FROM new_order n
JOIN order_items oi ON n.id = oi.order_id;-- Advisory Lock:做分布式任务互斥
SELECT pg_try_advisory_lock(10001);
-- true 表示获得锁,false 表示已有其他会话持有锁
-- 执行任务后释放
SELECT pg_advisory_unlock(10001);
-- 使用事务级 Advisory Lock(事务结束自动释放)
SELECT pg_advisory_xact_lock(10001);
-- 使用哈希值作为锁ID(避免手动分配ID冲突)
SELECT pg_try_advisory_lock(hashtext('task:daily_report'));
SELECT pg_advisory_unlock(hashtext('task:daily_report'));
-- Advisory Lock 实际应用场景
-- 场景:定时任务防止并发执行
BEGIN;
SELECT pg_try_advisory_xact_lock(hashtext('cron:daily_report'));
-- 如果返回 false,说明另一个进程已在执行,直接退出
-- 如果返回 true,继续执行任务
-- 事务结束(COMMIT/ROLLBACK)自动释放锁
COMMIT;
-- 场景:防止重复提交
BEGIN;
SELECT pg_try_advisory_xact_lock(
hashtext('submit:' || user_id || ':' || form_id)
);
-- 同一用户同一表单只能有一个事务在处理
INSERT INTO submissions (user_id, form_id, data)
VALUES (1001, 'survey_01', '{"answer": "yes"}');
COMMIT;JSONB、索引与 EXPLAIN
-- JSONB 存储扩展属性
CREATE TABLE products (
id BIGSERIAL PRIMARY KEY,
name TEXT NOT NULL,
attributes JSONB NOT NULL DEFAULT '{}'
);
INSERT INTO products(name, attributes)
VALUES
('机械键盘', '{"brand":"Sunny","switch":"red","wireless":false}'),
('蓝牙耳机', '{"brand":"Sunny","battery_hours":24,"wireless":true}');-- JSONB 查询
SELECT id, name
FROM products
WHERE attributes ->> 'brand' = 'Sunny'
AND (attributes ->> 'wireless')::boolean = true;
-- JSONB 操作符速查
-- -> 返回 JSON 对象(保留类型)
-- ->> 返回文本值
-- #> 路径访问(返回 JSON 对象)
-- #>> 路径访问(返回文本值)
-- @> 包含检查
-- ? 键存在检查
-- ?| 任一键存在
-- ?& 所有键存在
-- 嵌套 JSONB 查询
SELECT id, name, attributes
FROM products
WHERE attributes #>> '{specs,weight}' > '500';
-- JSONB 数组查询
SELECT id, name
FROM products
WHERE attributes ? 'tags'; -- 检查是否有 tags 键
SELECT id, name
FROM products
WHERE attributes->'tags' ? '热销'; -- 检查数组中是否包含"热销"-- GIN 索引提升 JSONB 查询性能
CREATE INDEX idx_products_attributes_gin ON products USING GIN(attributes);
EXPLAIN ANALYZE
SELECT id, name
FROM products
WHERE attributes @> '{"brand":"Sunny"}';
-- JSONB 表达式索引(只索引特定字段)
CREATE INDEX idx_products_brand ON products ((attributes ->> 'brand'));
-- 适合只查 brand 字段的场景,索引更小
-- JSONB 路径表达式索引(PostgreSQL 12+)
CREATE INDEX idx_products_specs_weight
ON products ((attributes #>> '{specs,weight}'));-- JSONB 修改操作
UPDATE products
SET attributes = attributes || '{"color":"black"}'
WHERE id = 1;
-- 合并新属性
UPDATE products
SET attributes = attributes - 'color'
WHERE id = 1;
-- 删除属性
UPDATE products
SET attributes = jsonb_set(attributes, '{specs,weight}', '"600g"')
WHERE id = 1;
-- 设置嵌套路径的值EXPLAIN ANALYZE 执行计划分析
-- EXPLAIN 只看计划,不实际执行
EXPLAIN SELECT * FROM orders WHERE customer_id = 1001;
-- EXPLAIN ANALYZE 实际执行并返回耗时
EXPLAIN ANALYZE SELECT * FROM orders WHERE customer_id = 1001;
-- EXPLAIN ANALYZE BUFFERS 显示缓冲区命中情况
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT customer_id, COUNT(*), SUM(amount)
FROM orders
WHERE status = 'paid'
GROUP BY customer_id;
-- 执行计划关键指标解读:
-- Seq Scan on orders → 全表顺序扫描(通常需要优化)
-- Index Scan using idx_x → 索引扫描(好)
-- Bitmap Heap Scan → 位图扫描(适合多条件 OR 查询)
-- Nested Loop → 嵌套循环(适合小表驱动大表)
-- Hash Join → 哈希连接(适合等值连接大表)
-- Merge Join → 归并连接(适合已排序数据)
-- actual time=0.05..0.12 → 实际执行时间(毫秒)
-- rows=42 → 实际返回行数
-- loops=1 → 执行次数
-- shared hit=120 → 共享缓冲区命中次数事务隔离级别与锁
-- 查看当前隔离级别
SHOW transaction_isolation;
-- 设置隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -- 默认
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- 四种隔离级别的行为对比
-- Read Committed:每次语句看到已提交的数据(默认)
-- Repeatable Read:事务内看到事务开始时的快照
-- Serializable:完全串行化,最高隔离
-- 行级锁
SELECT * FROM orders WHERE id = 1001 FOR UPDATE; -- 排他锁
SELECT * FROM orders WHERE id = 1001 FOR NO KEY UPDATE; -- 不锁外键
SELECT * FROM orders WHERE id = 1001 FOR SHARE; -- 共享锁
SELECT * FROM orders WHERE status = 'pending' FOR UPDATE SKIP LOCKED; -- 跳过已锁定行
-- SKIP LOCKED 常用于任务队列
-- 多个消费者并发取任务,已锁定的行被跳过
SELECT * FROM task_queue
WHERE status = 'pending'
ORDER BY created_at
LIMIT 1
FOR UPDATE SKIP LOCKED;优点
缺点
总结
PostgreSQL 的进阶能力非常适合复杂业务系统:窗口函数适合报表,JSONB 适合半结构化字段,UPSERT 适合幂等写入,Advisory Lock 适合任务互斥。真正发挥其价值的关键,是基于真实业务问题选择合适特性,而不是把所有高级语法都堆到一条 SQL 上。
关键知识点
- CTE 和窗口函数是 PostgreSQL 复杂查询的核心工具
ON CONFLICT能显著简化幂等写入逻辑- Advisory Lock 适合做应用级互斥,但要注意锁释放
- JSONB 很强大,但复杂查询仍要配合合适索引
- MVCC 机制意味着 UPDATE 实际是 INSERT + DELETE,VACUUM 是必要的
- FOR UPDATE SKIP LOCKED 是实现并发任务队列的最佳方案
项目落地视角
- 统计报表、排行榜、TopN 查询很适合窗口函数
- 幂等消费、库存快照、配置同步适合 UPSERT
- 调度任务、分布式互斥可用 Advisory Lock 代替额外中间件
- 商品扩展属性、用户偏好、动态配置可用 JSONB 承载
- 复杂 SQL 建议用 CTE 拆分,每段加注释说明意图
常见误区
- 所有复杂逻辑都塞进一条 SQL,导致难以维护和排障
- 用 JSONB 替代所有建模,最后索引和约束失控
- 使用 Advisory Lock 却忘记异常释放路径
- 看了 PostgreSQL 很强就忽略执行计划与索引设计
- 忽略 VACUUM 导致表膨胀和查询性能下降
- 不理解 MVCC 导致长事务占用大量磁盘空间
进阶路线
- 深入学习 EXPLAIN ANALYZE、VACUUM、统计信息与执行计划
- 研究 GIN、GiST、BRIN 等不同索引类型
- 学习 pgvector、TimescaleDB、PostGIS 等扩展生态
- 将 PostgreSQL 与 CDC、只读副本、逻辑复制结合起来理解
- 学习连接池(PgBouncer)和连接管理最佳实践
适用场景
- 复杂业务系统、分析报表、规则引擎
- 需要关系模型与半结构化数据混合建模的项目
- 幂等写入、任务调度、排行榜与运营分析场景
- 对数据一致性和 SQL 表达能力要求较高的系统
落地建议
- 先基于真实查询设计 SQL,再引入对应高级特性
- 对窗口函数、JSONB、UPSERT 等使用场景建立团队规范
- 高复杂度 SQL 一定要配套 EXPLAIN 和测试数据验证
- 监控锁等待、慢查询、膨胀和 autovacuum 状态
排错清单
- 检查复杂 SQL 是否命中预期索引
- 检查
EXPLAIN ANALYZE中扫描行数和排序/聚合成本 - 检查 Advisory Lock 是否存在未释放或抢占失败问题
- 检查 JSONB 查询是否真的使用到了 GIN/表达式索引
- 检查是否存在长事务导致 VACUUM 无法回收空间
复盘问题
- 当前业务里最值得用 PostgreSQL 高级能力解决的是什么?
- 哪些复杂查询本可以拆成更清晰的多步处理?
- JSONB 的灵活性是否已经开始侵蚀数据治理?
- 你的 PostgreSQL 慢查询,根因是 SQL 写法、索引还是数据分布?
