PostgreSQL 入门
大约 13 分钟约 3977 字
PostgreSQL 入门
简介
PostgreSQL 是一款功能强大的开源对象关系型数据库管理系统,以其卓越的扩展性、标准兼容性和丰富的数据类型支持而闻名。它支持 JSONB 半结构化数据、全文搜索、地理空间数据处理(PostGIS)等高级特性,被广泛应用于企业级应用和数据密集型项目中。
特点
安装与配置
安装 PostgreSQL
# Ubuntu/Debian
sudo apt update
sudo apt install postgresql postgresql-contrib
# 启动服务
sudo systemctl start postgresql
sudo systemctl enable postgresql
# macOS (Homebrew)
brew install postgresql@16
brew services start postgresql@16
# Docker 方式安装
docker run -d \
--name postgres \
-e POSTGRES_USER=admin \
-e POSTGRES_PASSWORD=secret \
-e POSTGRES_DB=myapp \
-p 5432:5432 \
postgres:16基本配置
# 连接数据库
sudo -u postgres psql
# 创建用户和数据库
CREATE USER myuser WITH PASSWORD 'mypassword';
CREATE DATABASE myapp OWNER myuser;
GRANT ALL PRIVILEGES ON DATABASE myapp TO myuser;
# 修改配置文件 postgresql.conf
# 监听地址
listen_addresses = '*'
# 最大连接数
max_connections = 200
# 共享缓冲区(建议为物理内存的 25%)
shared_buffers = 256MB
# 工作内存
work_mem = 4MB数据类型
常用数据类型
-- 创建表演示各种数据类型
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(200) NOT NULL,
description TEXT,
price NUMERIC(10, 2) NOT NULL DEFAULT 0,
stock INTEGER NOT NULL DEFAULT 0,
is_active BOOLEAN DEFAULT TRUE,
tags TEXT[] DEFAULT '{}',
metadata JSONB DEFAULT '{}',
uuid UUID DEFAULT gen_random_uuid(),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- 数组类型操作
INSERT INTO products (name, price, tags)
VALUES ('笔记本电脑', 5999.00, ARRAY['电子', '办公', '高性能']);
-- 数组查询
SELECT * FROM products WHERE '电子' = ANY(tags);
SELECT * FROM products WHERE tags @> ARRAY['办公'];
-- UUID 类型
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
SELECT uuid_generate_v4();基本 CRUD 操作
增删改查
-- 插入数据
INSERT INTO products (name, description, price, stock, metadata)
VALUES (
'机械键盘',
'Cherry MX 轴体机械键盘',
699.00,
150,
'{"brand": "Cherry", "color": "黑色", "switch_type": "红轴"}'
);
-- 批量插入
INSERT INTO products (name, price, stock) VALUES
('鼠标', 199.00, 300),
('显示器', 2499.00, 80),
('耳机', 399.00, 200);
-- 查询数据
SELECT id, name, price, stock FROM products
WHERE is_active = TRUE AND price > 100
ORDER BY price DESC
LIMIT 10 OFFSET 0;
-- 更新数据
UPDATE products
SET price = 799.00, updated_at = NOW()
WHERE name = '机械键盘';
-- 删除数据
DELETE FROM products WHERE stock = 0 AND is_active = FALSE;
-- 使用 RETURNING 子句
INSERT INTO products (name, price)
VALUES ('新产品', 999.00)
RETURNING id, name, created_at;
UPDATE products SET price = price * 0.9
WHERE stock > 100
RETURNING id, name, price;高级查询
-- CTE(公共表表达式)
WITH expensive_products AS (
SELECT * FROM products WHERE price > 1000
),
product_stats AS (
SELECT
COUNT(*) AS total_count,
AVG(price) AS avg_price,
MAX(price) AS max_price
FROM expensive_products
)
SELECT * FROM product_stats;
-- 窗口函数
SELECT
name,
price,
RANK() OVER (ORDER BY price DESC) AS price_rank,
price - AVG(price) OVER () AS diff_from_avg,
price - LAG(price) OVER (ORDER BY price) AS price_diff
FROM products
WHERE is_active = TRUE;
-- UPSERT(插入或更新)
INSERT INTO products (name, price, stock)
VALUES ('机械键盘', 699.00, 160)
ON CONFLICT (name)
DO UPDATE SET
stock = products.stock + EXCLUDED.stock,
updated_at = NOW();JSONB 操作
JSONB 数据处理
-- 创建包含 JSONB 的表
CREATE TABLE user_profiles (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
profile JSONB NOT NULL DEFAULT '{}'
);
-- 插入 JSONB 数据
INSERT INTO user_profiles (username, profile) VALUES
('zhangsan', '{
"name": "张三",
"age": 28,
"address": {"city": "北京", "district": "海淀区"},
"hobbies": ["编程", "阅读", "游泳"],
"contacts": {
"email": "zhangsan@example.com",
"phone": "13800138000"
}
}');
-- JSONB 查询操作符
-- -> 获取 JSON 对象(返回 JSON 类型)
SELECT profile->'name' FROM user_profiles WHERE username = 'zhangsan';
-- ->> 获取 JSON 对象(返回文本类型)
SELECT profile->>'name' FROM user_profiles WHERE username = 'zhangsan';
-- 嵌套查询
SELECT profile->'address'->>'city' AS city FROM user_profiles;
-- 包含查询 @>
SELECT * FROM user_profiles
WHERE profile @> '{"name": "张三"}';
-- JSONB 路径查询
SELECT * FROM user_profiles
WHERE profile->'contacts'->>'email' LIKE '%@example.com';
-- 更新 JSONB 字段
UPDATE user_profiles
SET profile = jsonb_set(profile, '{age}', '29')
WHERE username = 'zhangsan';
-- 追加数组元素
UPDATE user_profiles
SET profile = jsonb_set(
profile,
'{hobbies}',
(profile->'hobbies') || '"旅游"'::jsonb
)
WHERE username = 'zhangsan';
-- JSONB 索引(GIN 索引)
CREATE INDEX idx_profile_gin ON user_profiles USING GIN (profile);
CREATE INDEX idx_profile_path ON user_profiles ((profile->'address'->>'city'));全文搜索
全文搜索功能
-- 创建测试数据
CREATE TABLE articles (
id SERIAL PRIMARY KEY,
title VARCHAR(200) NOT NULL,
content TEXT NOT NULL,
author VARCHAR(50),
created_at TIMESTAMP DEFAULT NOW()
);
INSERT INTO articles (title, content, author) VALUES
('PostgreSQL 入门教程', 'PostgreSQL 是一款功能强大的开源关系数据库,支持 JSONB、全文搜索等高级特性', '张三'),
('数据库优化指南', '本文介绍数据库索引优化、查询优化和参数调优的实用技巧', '李四'),
('全文搜索实战', '使用 PostgreSQL 内置的全文搜索功能,可以快速实现搜索引擎级别的文本检索', '王五');
-- 基本全文搜索
SELECT title,
ts_headline(content, websearch_to_tsquery('数据库 优化')) AS highlight
FROM articles
WHERE to_tsvector('simple', content) @@ websearch_to_tsquery('数据库 优化');
-- 创建全文搜索索引
ALTER TABLE articles ADD COLUMN search_vector TSVECTOR
GENERATED ALWAYS AS (to_tsvector('simple', coalesce(title,'') || ' ' || coalesce(content,''))) STORED;
CREATE INDEX idx_articles_search ON articles USING GIN (search_vector);
-- 使用索引进行全文搜索
SELECT title, ts_rank(search_vector, query) AS rank
FROM articles, websearch_to_tsquery('simple', 'PostgreSQL 全文搜索') query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- ts_headline 高亮显示
SELECT title,
ts_headline('simple', content, websearch_to_tsquery('PostgreSQL')) AS highlight
FROM articles
WHERE search_vector @@ websearch_to_tsquery('PostgreSQL');索引与性能优化
PostgreSQL 索引类型
-- 1. B-Tree 索引(默认)
CREATE INDEX idx_products_name ON products(name);
CREATE INDEX idx_products_price ON products(price DESC);
-- 2. GIN 索引(适合 JSONB、数组、全文搜索)
CREATE INDEX idx_profile_gin ON user_profiles USING GIN(profile);
CREATE INDEX idx_tags_gin ON products USING GIN(tags);
-- 3. GiST 索引(适合地理空间、范围类型)
-- 需要 PostGIS 扩展
CREATE EXTENSION IF NOT EXISTS postgis;
CREATE INDEX idx_locations_gist ON locations USING GIST(geom);
-- 4. BRIN 索引(适合物理有序的大表)
-- 例如按时间排序的日志表
CREATE INDEX idx_logs_created ON logs USING BRIN(created_at)
WITH (pages_per_range = 32);
-- 5. 哈希索引(等值查询)
CREATE INDEX idx_users_email_hash ON users USING HASH(email);
-- 6. 部分索引(只索引满足条件的行)
CREATE INDEX idx_active_products ON products(name)
WHERE is_active = TRUE;
-- 7. 表达式索引
CREATE INDEX idx_products_lower_name ON products(LOWER(name));
CREATE INDEX idx_products_price_tax ON products(price * 1.13);
-- 8. 并发创建索引(不锁表)
CREATE INDEX CONCURRENTLY idx_products_name ON products(name);
-- 查看索引使用情况
SELECT schemaname, tablename, indexname, idx_scan
FROM pg_stat_user_indexes
ORDER BY idx_scan;
-- 查看未使用的索引
SELECT schemaname, tablename, indexname
FROM pg_stat_user_indexes
WHERE idx_scan = 0;执行计划分析
-- EXPLAIN 分析查询计划
EXPLAIN
SELECT * FROM products WHERE price > 1000;
-- EXPLAIN ANALYZE 实际执行并统计耗时
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT p.name, p.price, c.name AS category
FROM products p
JOIN categories c ON p.category_id = c.id
WHERE p.price > 500
ORDER BY p.price DESC
LIMIT 20;
-- 关键信息解读:
-- Seq Scan — 顺序扫描(全表扫描,考虑加索引)
-- Index Scan — 索引扫描
-- Index Only Scan — 仅索引扫描(不需要回表)
-- Bitmap Heap Scan — 位图扫描
-- Nested Loop — 嵌套循环(适合小表)
-- Hash Join — 哈希连接(适合大表)
-- Merge Join — 合并连接(适合有序数据)
-- BUFFERS 选项:查看内存命中情况
-- shared hit — 缓冲池命中
-- shared read — 磁盘读取
-- 命中率越高越好高级 SQL 特性
窗口函数深入
-- 1. 累计求和
SELECT
order_date,
amount,
SUM(amount) OVER (ORDER BY order_date) AS running_total
FROM daily_sales;
-- 2. 移动平均
SELECT
order_date,
amount,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS ma_7day
FROM daily_sales;
-- 3. 分组排名
SELECT
department,
employee_name,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
PERCENT_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS pct_rank
FROM employees;
-- 4. 首值/末值
SELECT
department,
employee_name,
salary,
FIRST_VALUE(employee_name) OVER (
PARTITION BY department ORDER BY salary DESC
) AS highest_paid,
LAST_VALUE(employee_name) OVER (
PARTITION BY department ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS lowest_paid
FROM employees;
-- 5. 行号分页(替代 LIMIT OFFSET)
SELECT * FROM (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY created_at DESC) AS row_num
FROM products
) t
WHERE row_num BETWEEN 21 AND 40;递归查询(CTE)
-- 组织架构树查询
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
manager_id INTEGER REFERENCES employees(id),
department VARCHAR(50)
);
-- 递归查询某员工的所有下属
WITH RECURSIVE subordinates AS (
-- 起点:指定员工
SELECT id, name, manager_id, 1 AS level
FROM employees WHERE id = 1
UNION ALL
-- 递归:查找下属
SELECT e.id, e.name, e.manager_id, s.level + 1
FROM employees e
INNER JOIN subordinates s ON e.manager_id = s.id
)
SELECT * FROM subordinates ORDER BY level;
-- 递归查询某员工的所有上级
WITH RECURSIVE managers AS (
SELECT id, name, manager_id, 1 AS level
FROM employees WHERE id = 100
UNION ALL
SELECT e.id, e.name, e.manager_id, m.level + 1
FROM employees e
INNER JOIN managers m ON e.id = m.manager_id
)
SELECT * FROM managers ORDER BY level;物化视图
-- 创建物化视图(结果缓存到磁盘)
CREATE MATERIALIZED VIEW mv_product_sales AS
SELECT
p.id,
p.name,
p.price,
COALESCE(SUM(oi.quantity), 0) AS total_sold,
COALESCE(SUM(oi.quantity * oi.price), 0) AS total_revenue
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id, p.name, p.price
WITH DATA;
-- 创建索引加速物化视图查询
CREATE INDEX idx_mv_product_sales_revenue
ON mv_product_sales(total_revenue DESC);
-- 刷新物化视图(全量刷新)
REFRESH MATERIALIZED VIEW mv_product_sales;
-- 并发刷新(PostgreSQL 9.4+,不锁查询)
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_product_sales;
-- 适用场景:
-- 1. 复杂聚合报表
-- 2. 数据不需要实时更新
-- 3. 查询频率远高于更新频率
-- 4. 多个报表共享同一聚合逻辑事务与并发控制
MVCC 机制
-- PostgreSQL 使用 MVCC(多版本并发控制)
-- 读操作不会阻塞写操作,写操作不会阻塞读操作
-- 查看当前事务 ID
SELECT txid_current();
-- 查看事务隔离级别
SHOW transaction_isolation;
-- 设置隔离级别
SET TRANSACTION ISOLATION LEVEL READ COMMITTED; -- 默认
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- 各隔离级别对比:
-- READ COMMITTED:语句级快照,每次语句看到最新已提交数据
-- REPEATABLE READ:事务级快照,事务开始后看不到其他事务的修改
-- SERIALIZABLE:最高隔离级别,严格串行化,性能开销最大
-- 悲观锁
SELECT * FROM products WHERE id = 1 FOR UPDATE; -- 行锁
SELECT * FROM products WHERE id = 1 FOR NO KEY UPDATE; -- 不锁外键
SELECT * FROM products WHERE id = 1 FOR SHARE; -- 共享锁
SELECT * FROM products WHERE category_id = 1 FOR UPDATE SKIP LOCKED; -- 跳过已锁行
-- SKIP LOCKED 实现任务队列
SELECT * FROM task_queue
WHERE status = 'pending'
ORDER BY created_at
LIMIT 1
FOR UPDATE SKIP LOCKED;
-- 多个 worker 可以并发处理,不会互相等待VACUUM 与表膨胀
-- PostgreSQL 的 MVCC 机制导致更新和删除留下死元组
-- 需要定期 VACUUM 回收空间
-- 自动 VACUUM(默认开启)
SHOW autovacuum;
SHOW autovacuum_vacuum_scale_factor; -- 默认 0.2(20% 行变化触发)
-- 手动 VACUUM
VACUUM products; -- 回收空间给操作系统
VACUUM FULL products; -- 完全重建表(锁表!)
VACUUM ANALYZE products; -- 回收空间 + 更新统计信息
-- 查看表膨胀情况
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size,
pg_size_pretty(pg_relation_size(schemaname || '.' || tablename)) AS table_size,
n_dead_tup AS dead_tuples,
n_live_tup AS live_tuples,
ROUND(n_dead_tup::numeric / NULLIF(n_live_tup, 0) * 100, 2) AS bloat_pct
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC;
-- 按表调整自动 VACUUM 参数
ALTER TABLE products SET (autovacuum_vacuum_scale_factor = 0.05);
ALTER TABLE products SET (autovacuum_analyze_scale_factor = 0.02);
ALTER TABLE products SET (autovacuum_vacuum_cost_delay = 20);备份与恢复
逻辑备份
# pg_dump — 逻辑备份
pg_dump -U admin -d myapp -f backup_20260414.sql
pg_dump -U admin -d myapp -F c -f backup_20260414.dump # 自定义格式(支持并行恢复)
pg_dump -U admin -d myapp -t products -f products_only.sql # 只备份指定表
# pg_dumpall — 备份所有数据库(包括全局对象)
pg_dumpall -U admin -f full_backup.sql
# 并行备份(pg_dump -j)
pg_dump -U admin -d myapp -F d -f backup_dir -j 4 # 4 个并行任务
# 恢复
psql -U admin -d myapp -f backup_20260414.sql
pg_restore -U admin -d myapp -j 4 backup_20260414.dump # 并行恢复物理备份
# pg_basebackup — 在线热备份
pg_basebackup -U admin -D /data/backup -Ft -z -P
# -Ft: tar 格式
# -z: gzip 压缩
# -P: 显示进度
# 配置 WAL 归档(postgresql.conf)
wal_level = replica
archive_mode = on
archive_command = 'cp %p /data/wal_archive/%f'
# 使用 PITR(时间点恢复)
# 1. 恢复基础备份
# 2. 重放 WAL 日志到指定时间点
recovery_target_time = '2026-04-14 10:30:00'
recovery_target_action = 'promote'常用扩展
实用扩展
-- 1. pg_trgm — 模糊搜索(三元组)
CREATE EXTENSION pg_trgm;
CREATE INDEX idx_products_name_trgm ON products USING GIN(name gin_trgm_ops);
SELECT * FROM products WHERE name LIKE '%键盘%'; -- 走索引
SELECT * FROM products WHERE name SIMILAR TO '(机械|游戏)键盘';
SELECT *, similarity(name, '机械键盘') AS sim
FROM products WHERE name % '机械键盘' ORDER BY sim DESC;
-- 2. uuid-ossp — UUID 生成
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
SELECT uuid_generate_v4(); -- 随机 UUID
SELECT uuid_generate_v1(); -- 基于 MAC 和时间戳
-- 3. citext — 大小写不敏感文本
CREATE EXTENSION citext;
CREATE TABLE users (
email CITEXT PRIMARY KEY -- 'A@B.COM' 和 'a@b.com' 视为相同
);
-- 4. hstore — 键值对存储
CREATE EXTENSION hstore;
CREATE TABLE product_attrs (
id SERIAL PRIMARY KEY,
attributes HSTORE
);
INSERT INTO product_attrs (attributes) VALUES
('color => "红色", weight => "500g", material => "金属"');
SELECT attributes->'color' FROM product_attrs;
SELECT * FROM product_attrs WHERE attributes ? 'color';
-- 5. pgvector — 向量搜索(AI 应用)
CREATE EXTENSION vector;
CREATE TABLE embeddings (
id SERIAL PRIMARY KEY,
content TEXT,
embedding VECTOR(1536)
);
CREATE INDEX idx_embeddings ON embeddings
USING hnsw (embedding vector_cosine_ops);
SELECT content, embedding <=> '[0.1, 0.2, ...]' AS distance
FROM embeddings
ORDER BY embedding <=> '[0.1, 0.2, ...]'
LIMIT 10;优点
缺点
总结
PostgreSQL 是一款功能强大且高度可扩展的开源关系型数据库,凭借其丰富的数据类型支持(特别是 JSONB)、内置全文搜索和 MVCC 并发控制机制,非常适合构建现代化的数据密集型应用。虽然配置和学习成本较高,但其强大的功能和活跃的社区生态使其成为企业级应用的优选方案。
关键知识点
- 数据库主题一定要同时看数据模型、读写模式和执行代价。
- 很多性能问题不是 SQL 语法问题,而是索引、统计信息、事务和数据分布问题。
- 高可用、备份、迁移和治理与查询优化同样重要。
- 先把数据模型、访问模式和执行代价绑定起来理解。
项目落地视角
- 所有优化前后都保留执行计划、样本 SQL 和关键指标对比。
- 上线前准备回滚脚本、备份点和校验方案。
- 把连接池、锁等待、慢查询和容量增长纳入日常巡检。
- 保留执行计划、样本 SQL、索引定义和优化前后指标。
常见误区
- 脱离真实数据分布讨论索引或分片。
- 只看单条 SQL,不看整条业务链路的事务和锁。
- 把测试环境结论直接等同于生产环境结论。
- 脱离真实数据分布设计索引。
进阶路线
- 继续向执行计划、存储引擎、复制机制和数据治理层深入。
- 把主题与 ORM、缓存、消息队列和归档策略联动起来思考。
- 沉淀成数据库设计规范、SQL 审核规则和变更流程。
- 继续深入存储引擎、复制机制、归档与冷热分层治理。
适用场景
- 当你准备把《PostgreSQL 入门》真正落到项目里时,最适合先在一个独立模块或最小样例里验证关键路径。
- 适合数据建模、查询优化、事务控制、高可用和迁移治理。
- 当系统开始遇到慢查询、锁冲突、热点数据或容量增长时,这类主题价值最高。
落地建议
- 先分析真实查询模式、数据量级和写入特征,再决定索引或分片策略。
- 所有优化结论都结合执行计划、样本数据和监控指标验证。
- 高风险操作前准备备份、回滚脚本与校验 SQL。
排错清单
- 先确认瓶颈在 CPU、I/O、锁等待、网络还是 SQL 本身。
- 检查执行计划是否走错索引、是否发生排序或全表扫描。
- 排查长事务、隐式类型转换、统计信息过期和参数嗅探。
复盘问题
- 如果把《PostgreSQL 入门》放进你的当前项目,最先要验证的输入、输出和失败路径分别是什么?
- 《PostgreSQL 入门》最容易在什么规模、什么边界条件下暴露问题?你会用什么指标或日志去确认?
- 相比默认实现或替代方案,采用《PostgreSQL 入门》最大的收益和代价分别是什么?
